flickrtouchr.py

   1 #!/usr/bin/env python
   2
   3 #
   4 # FlickrTouchr - a simple python script to grab all your photos from flickr,
   5 #                dump into a directory - organised into folders by set -
   6 #                along with any favourites you have saved.
   7 #
   8 #                You can then sync the photos to an iPod touch.
   9 #
  10 # Version:       1.2
  11 #
  12 # Original Author:      colm - AT - allcosts.net  - Colm MacCarthaigh - 2008-01-21
  13 #
  14 # Modified by:                  Dan Benjamin - http://hivelogic.com
  15 #
  16 # License:                      Apache 2.0 - http://www.apache.org/licenses/LICENSE-2.0.html
  17 #
  18
  19 import xml.dom.minidom
  20 import webbrowser
  21 import urlparse
  22 import urllib2
  23 import unicodedata
  24 import cPickle
  25 import md5
  26 import sys
  27 import os
  28
  29 API_KEY       = "e224418b91b4af4e8cdb0564716fa9bd"
  30 SHARED_SECRET = "7cddb9c9716501a0"
  31
  32 #
  33 # Utility functions for dealing with flickr authentication
  34 #
  35 def getText(nodelist):
  36     rc = ""
  37     for node in nodelist:
  38         if node.nodeType == node.TEXT_NODE:
  39             rc = rc + node.data
  40     return rc.encode("utf-8")
  41
  42 #
  43 # Get the frob based on our API_KEY and shared secret
  44 #
  45 def getfrob():
  46     # Create our signing string
  47     string = SHARED_SECRET + "api_key" + API_KEY + "methodflickr.auth.getFrob"
  48     hash   = md5.new(string).digest().encode("hex")
  49
  50     # Formulate the request
  51     url    = "http://api.flickr.com/services/rest/?method=flickr.auth.getFrob"
  52     url   += "&api_key=" + API_KEY + "&api_sig=" + hash
  53
  54     try:
  55         # Make the request and extract the frob
  56         response = urllib2.urlopen(url)
  57
  58         # Parse the XML
  59         dom = xml.dom.minidom.parse(response)
  60
  61         # get the frob
  62         frob = getText(dom.getElementsByTagName("frob")[0].childNodes)
  63
  64         # Free the DOM
  65         dom.unlink()
  66
  67         # Return the frob
  68         return frob
  69
  70     except:
  71         raise "Could not retrieve frob"
  72
  73 #
  74 # Login and get a token
  75 #
  76 def froblogin(frob, perms):
  77     string = SHARED_SECRET + "api_key" + API_KEY + "frob" + frob + "perms" + perms
  78     hash   = md5.new(string).digest().encode("hex")
  79
  80     # Formulate the request
  81     url    = "http://api.flickr.com/services/auth/?"
  82     url   += "api_key=" + API_KEY + "&perms=" + perms
  83     url   += "&frob=" + frob + "&api_sig=" + hash
  84
  85     # Tell the user what's happening
  86     print "In order to allow FlickrTouchr to read your photos and favourites"
  87     print "you need to allow the application. Please press return when you've"
  88     print "granted access at the following url (which should have opened"
  89     print "automatically)."
  90     print
  91     print url
  92     print
  93     print "Waiting for you to press return"
  94
  95     # We now have a login url, open it in a web-browser
  96     webbrowser.open_new(url)
  97
  98     # Wait for input
  99     sys.stdin.readline()
 100
 101     # Now, try and retrieve a token
 102     string = SHARED_SECRET + "api_key" + API_KEY + "frob" + frob + "methodflickr.auth.getToken"
 103     hash   = md5.new(string).digest().encode("hex")
 104
 105     # Formulate the request
 106     url    = "http://api.flickr.com/services/rest/?method=flickr.auth.getToken"
 107     url   += "&api_key=" + API_KEY + "&frob=" + frob
 108     url   += "&api_sig=" + hash
 109
 110     # See if we get a token
 111     try:
 112         # Make the request and extract the frob
 113         response = urllib2.urlopen(url)
 114
 115         # Parse the XML
 116         dom = xml.dom.minidom.parse(response)
 117
 118         # get the token and user-id
 119         token = getText(dom.getElementsByTagName("token")[0].childNodes)
 120         nsid  = dom.getElementsByTagName("user")[0].getAttribute("nsid")
 121
 122         # Free the DOM
 123         dom.unlink()
 124
 125         # Return the token and userid
 126         return (nsid, token)
 127     except:
 128         raise "Login failed"
 129
 130 #
 131 # Sign an arbitrary flickr request with a token
 132 #
 133 def flickrsign(url, token):
 134     query  = urlparse.urlparse(url).query
 135     query += "&api_key=" + API_KEY + "&auth_token=" + token
 136     params = query.split('&')
 137
 138     # Create the string to hash
 139     string = SHARED_SECRET
 140
 141     # Sort the arguments alphabettically
 142     params.sort()
 143     for param in params:
 144         string += param.replace('=', '')
 145     hash   = md5.new(string).digest().encode("hex")
 146
 147     # Now, append the api_key, and the api_sig args
 148     url += "&api_key=" + API_KEY + "&auth_token=" + token + "&api_sig=" + hash
 149
 150     # Return the signed url
 151     return url
 152
 153 #
 154 # Grab the photo from the server
 155 #
 156 def getphoto(id, token, filename):
 157     try:
 158         # Contruct a request to find the sizes
 159         url  = "http://api.flickr.com/services/rest/?method=flickr.photos.getSizes"
 160         url += "&photo_id=" + id
 161
 162         # Sign the request
 163         url = flickrsign(url, token)
 164
 165         # Make the request
 166         response = urllib2.urlopen(url)
 167
 168         # Parse the XML
 169         dom = xml.dom.minidom.parse(response)
 170
 171         # Get the list of sizes
 172         sizes =  dom.getElementsByTagName("size")
 173
 174         # Grab the original if it exists
 175         if (sizes[-1].getAttribute("label") == "Original"):
 176           imgurl = sizes[-1].getAttribute("source")
 177         else:
 178           print "Failed to get original for photo id " + id
 179
 180
 181         # Free the DOM memory
 182         dom.unlink()
 183
 184         # Grab the image file
 185         response = urllib2.urlopen(imgurl)
 186         data = response.read()
 187
 188         # Save the file!
 189         fh = open(filename, "w")
 190         fh.write(data)
 191         fh.close()
 192
 193         return filename
 194     except:
 195         print "Failed to retrieve photo id " + id
 196
 197 ######## Main Application ##########
 198 if __name__ == '__main__':
 199
 200     # The first, and only argument needs to be a directory
 201     try:
 202         os.chdir(sys.argv[1])
 203     except:
 204         print "usage: %s directory" % sys.argv[0]
 205         sys.exit(1)
 206
 207     # First things first, see if we have a cached user and auth-token
 208     try:
 209         cache = open("touchr.frob.cache", "r")
 210         config = cPickle.load(cache)
 211         cache.close()
 212
 213     # We don't - get a new one
 214     except:
 215         (user, token) = froblogin(getfrob(), "read")
 216         config = { "version":1 , "user":user, "token":token }
 217
 218         # Save it for future use
 219         cache = open("touchr.frob.cache", "w")
 220         cPickle.dump(config, cache)
 221         cache.close()
 222
 223     # Now, construct a query for the list of photo sets
 224     url  = "http://api.flickr.com/services/rest/?method=flickr.photosets.getList"
 225     url += "&user_id=" + config["user"]
 226     url  = flickrsign(url, config["token"])
 227
 228     # get the result
 229     response = urllib2.urlopen(url)
 230
 231     # Parse the XML
 232     dom = xml.dom.minidom.parse(response)
 233
 234     # Get the list of Sets
 235     sets =  dom.getElementsByTagName("photoset")
 236
 237     # For each set - create a url
 238     urls = []
 239     for set in sets:
 240         pid = set.getAttribute("id")
 241         dir = getText(set.getElementsByTagName("title")[0].childNodes)
 242         dir = unicodedata.normalize('NFKD', dir.decode("utf-8", "ignore")).encode('ASCII', 'ignore') # Normalize to ASCII
 243
 244         # Build the list of photos
 245         url   = "http://api.flickr.com/services/rest/?method=flickr.photosets.getPhotos"
 246         url  += "&photoset_id=" + pid
 247
 248         # Append to our list of urls
 249         urls.append( (url , dir) )
 250
 251     # Free the DOM memory
 252     dom.unlink()
 253
 254     # Add the photos which are not in any set
 255     url   = "http://api.flickr.com/services/rest/?method=flickr.photos.getNotInSet"
 256     urls.append( (url, "No Set") )
 257
 258     # Add the user's Favourites
 259     url   = "http://api.flickr.com/services/rest/?method=flickr.favorites.getList"
 260     urls.append( (url, "Favourites") )
 261
 262     # Time to get the photos
 263     inodes = {}
 264     for (url , dir) in urls:
 265         # Create the directory
 266         try:
 267             os.makedirs(dir)
 268         except:
 269             pass
 270
 271         # Get 500 results per page
 272         url += "&per_page=500"
 273         pages = page = 1
 274
 275         while page <= pages:
 276             request = url + "&page=" + str(page)
 277
 278             # Sign the url
 279             request = flickrsign(request, config["token"])
 280
 281             # Make the request
 282             response = urllib2.urlopen(request)
 283
 284             # Parse the XML
 285             dom = xml.dom.minidom.parse(response)
 286
 287             # Get the total
 288             pages = int(dom.getElementsByTagName("photo")[0].parentNode.getAttribute("pages"))
 289
 290             # Grab the photos
 291             for photo in dom.getElementsByTagName("photo"):
 292                 # Tell the user we're grabbing the file
 293                 print photo.getAttribute("title").encode("utf8") + " ... in set ... " + dir
 294
 295                 # Grab the id
 296                 photoid = photo.getAttribute("id")
 297
 298                 # The target
 299                 target = dir + "/" + photoid + ".jpg"
 300
 301                 # Skip files that exist
 302                 if os.access(target, os.R_OK):
 303                     continue
 304
 305                 # Look it up in our dictionary of inodes first
 306                 if inodes.has_key(photoid) and os.access(inodes[photoid], os.R_OK):
 307                     # woo, we have it already, use a hard-link
 308                     os.link(inodes[photoid], target)
 309                 else:
 310                     inodes[photoid] = getphoto(photo.getAttribute("id"), config["token"], target)
 311
 312             # Move on the next page
 313             page = page + 1