flickrtouchr.py

   1 #!/usr/bin/env python
   2
   3 #
   4 # FlickrTouchr - a simple python script to grab all your photos from flickr,
   5 #                dump into a directory - organised into folders by set -
   6 #                along with any favourites you have saved.
   7 #
   8 #                You can then sync the photos to an iPod touch.
   9 #
  10 # Version:       1.2
  11 #
  12 # Original Author:      colm - AT - allcosts.net  - Colm MacCarthaigh - 2008-01-21
  13 #
  14 # Modified by:                  Dan Benjamin - http://hivelogic.com
  15 #
  16 # License:                      Apache 2.0 - http://www.apache.org/licenses/LICENSE-2.0.html
  17 #
  18
  19 import xml.dom.minidom
  20 import webbrowser
  21 import urlparse
  22 import urllib2
  23 import unicodedata
  24 import cPickle
  25 import md5
  26 import sys
  27 import os
  28
  29 API_KEY       = "e224418b91b4af4e8cdb0564716fa9bd"
  30 SHARED_SECRET = "7cddb9c9716501a0"
  31
  32 #
  33 # Utility functions for dealing with flickr authentication
  34 #
  35 def getText(nodelist):
  36     rc = ""
  37     for node in nodelist:
  38         if node.nodeType == node.TEXT_NODE:
  39             rc = rc + node.data
  40     return rc.encode("utf-8")
  41
  42 #
  43 # Get the frob based on our API_KEY and shared secret
  44 #
  45 def getfrob():
  46     # Create our signing string
  47     string = SHARED_SECRET + "api_key" + API_KEY + "methodflickr.auth.getFrob"
  48     hash   = md5.new(string).digest().encode("hex")
  49
  50     # Formulate the request
  51     url    = "http://api.flickr.com/services/rest/?method=flickr.auth.getFrob"
  52     url   += "&api_key=" + API_KEY + "&api_sig=" + hash
  53
  54     try:
  55         # Make the request and extract the frob
  56         response = urllib2.urlopen(url)
  57
  58         # Parse the XML
  59         dom = xml.dom.minidom.parse(response)
  60
  61         # get the frob
  62         frob = getText(dom.getElementsByTagName("frob")[0].childNodes)
  63
  64         # Free the DOM
  65         dom.unlink()
  66
  67         # Return the frob
  68         return frob
  69
  70     except:
  71         raise "Could not retrieve frob"
  72
  73 #
  74 # Login and get a token
  75 #
  76 def froblogin(frob, perms):
  77     string = SHARED_SECRET + "api_key" + API_KEY + "frob" + frob + "perms" + perms
  78     hash   = md5.new(string).digest().encode("hex")
  79
  80     # Formulate the request
  81     url    = "http://api.flickr.com/services/auth/?"
  82     url   += "api_key=" + API_KEY + "&perms=" + perms
  83     url   += "&frob=" + frob + "&api_sig=" + hash
  84
  85     # Tell the user what's happening
  86     print "In order to allow FlickrTouchr to read your photos and favourites"
  87     print "you need to allow the application. Please press return when you've"
  88     print "granted access at the following url (which should have opened"
  89     print "automatically)."
  90     print
  91     print url
  92     print
  93     print "Waiting for you to press return"
  94
  95     # We now have a login url, open it in a web-browser
  96     webbrowser.open_new(url)
  97
  98     # Wait for input
  99     sys.stdin.readline()
 100
 101     # Now, try and retrieve a token
 102     string = SHARED_SECRET + "api_key" + API_KEY + "frob" + frob + "methodflickr.auth.getToken"
 103     hash   = md5.new(string).digest().encode("hex")
 104
 105     # Formulate the request
 106     url    = "http://api.flickr.com/services/rest/?method=flickr.auth.getToken"
 107     url   += "&api_key=" + API_KEY + "&frob=" + frob
 108     url   += "&api_sig=" + hash
 109
 110     # See if we get a token
 111     try:
 112         # Make the request and extract the frob
 113         response = urllib2.urlopen(url)
 114
 115         # Parse the XML
 116         dom = xml.dom.minidom.parse(response)
 117
 118         # get the token and user-id
 119         token = getText(dom.getElementsByTagName("token")[0].childNodes)
 120         nsid  = dom.getElementsByTagName("user")[0].getAttribute("nsid")
 121
 122         # Free the DOM
 123         dom.unlink()
 124
 125         # Return the token and userid
 126         return (nsid, token)
 127     except:
 128         raise "Login failed"
 129
 130 #
 131 # Sign an arbitrary flickr request with a token
 132 #
 133 def flickrsign(url, token):
 134     query  = urlparse.urlparse(url).query
 135     query += "&api_key=" + API_KEY + "&auth_token=" + token
 136     params = query.split('&')
 137
 138     # Create the string to hash
 139     string = SHARED_SECRET
 140
 141     # Sort the arguments alphabettically
 142     params.sort()
 143     for param in params:
 144         string += param.replace('=', '')
 145     hash   = md5.new(string).digest().encode("hex")
 146
 147     # Now, append the api_key, and the api_sig args
 148     url += "&api_key=" + API_KEY + "&auth_token=" + token + "&api_sig=" + hash
 149
 150     # Return the signed url
 151     return url
 152
 153 #
 154 # Grab the photo from the server
 155 #
 156 def getphoto(imgurl, filename):
 157     # Grab the image file
 158     response = urllib2.urlopen(imgurl)
 159     data = response.read()
 160
 161     # Save the file!
 162     fh = open(filename, "w")
 163     fh.write(data)
 164     fh.close()
 165
 166     return filename
 167
 168 #
 169 # Escape Unicode chars
 170 # http://stackoverflow.com/questions/3011569/how-do-i-convert-filenames-from-unicode-to-ascii
 171 #
 172 def unistrip(s):
 173     if isinstance(s, str):
 174         s = s.decode('utf-8')
 175     chars = []
 176     for i in s:
 177         if ord(i) > 0x7f:
 178             chars.append(u'_')
 179         else:
 180             chars.append(i)
 181     return u''.join(chars)
 182
 183 ######## Main Application ##########
 184 if __name__ == '__main__':
 185
 186     # The first, and only argument needs to be a directory
 187     try:
 188         os.chdir(sys.argv[1])
 189     except:
 190         print "usage: %s directory" % sys.argv[0]
 191         sys.exit(1)
 192
 193     # First things first, see if we have a cached user and auth-token
 194     try:
 195         cache = open("touchr.frob.cache", "r")
 196         config = cPickle.load(cache)
 197         cache.close()
 198
 199     # We don't - get a new one
 200     except:
 201         (user, token) = froblogin(getfrob(), "read")
 202         config = { "version":1 , "user":user, "token":token }
 203
 204         # Save it for future use
 205         cache = open("touchr.frob.cache", "w")
 206         cPickle.dump(config, cache)
 207         cache.close()
 208
 209     # Now, construct a query for the list of photo sets
 210     url  = "http://api.flickr.com/services/rest/?method=flickr.photosets.getList"
 211     url += "&user_id=" + config["user"]
 212     url  = flickrsign(url, config["token"])
 213
 214     # get the result
 215     response = urllib2.urlopen(url)
 216
 217     # Parse the XML
 218     dom = xml.dom.minidom.parse(response)
 219
 220     # Get the list of Sets
 221     sets =  dom.getElementsByTagName("photoset")
 222
 223     # For each set - create a url
 224     urls = []
 225     for set in sets:
 226         pid = set.getAttribute("id")
 227         dir = getText(set.getElementsByTagName("title")[0].childNodes)
 228         #dir = unicodedata.normalize('NFC', dir.decode("utf-8", "ignore")).encode('ASCII', 'ignore') # Normalize to ASCII
 229         dir = unistrip(dir)  # Normalize to ASCII, converting Unicode chars to '_'
 230
 231         # Build the list of photos
 232         url   = "http://api.flickr.com/services/rest/?method=flickr.photosets.getPhotos"
 233         url  += "&photoset_id=" + pid
 234
 235         # Append to our list of urls
 236         urls.append( (url , dir) )
 237
 238     # Free the DOM memory
 239     dom.unlink()
 240
 241     # Add the photos which are not in any set
 242     url   = "http://api.flickr.com/services/rest/?method=flickr.photos.getNotInSet"
 243     urls.append( (url, "No Set") )
 244
 245     # Add the user's Favourites
 246     url   = "http://api.flickr.com/services/rest/?method=flickr.favorites.getList"
 247     urls.append( (url, "Favourites") )
 248
 249     # Time to get the photos
 250     inodes = {}
 251     for (url , dir) in urls:
 252         # Create the directory
 253         try:
 254             os.makedirs(dir)
 255         except:
 256             pass
 257
 258         # Get 500 results per page
 259         url += "&per_page=500"
 260         pages = page = 1
 261
 262         # Get Date-Taken and Original-size URL for each result photo
 263         url  += "&extras=date_taken,url_o,url_l,url_m"
 264
 265         while page <= pages:
 266             request = url + "&page=" + str(page)
 267
 268             # Sign the url
 269             request = flickrsign(request, config["token"])
 270
 271             # Make the request
 272             response = urllib2.urlopen(request)
 273
 274             # Parse the XML
 275             dom = xml.dom.minidom.parse(response)
 276
 277             # Get the total
 278             pages = int(dom.getElementsByTagName("photo")[0].parentNode.getAttribute("pages"))
 279
 280             # Grab the photos
 281             for photo in dom.getElementsByTagName("photo"):
 282                 # Tell the user we're grabbing the file
 283                 print photo.getAttribute("title").encode("utf8") + " ... in set ... " + dir
 284
 285                 # Grab the id
 286                 photoid = photo.getAttribute("id")
 287
 288                 # Grab the taken date
 289                 taken = photo.getAttribute("datetaken")
 290                 taken = taken.replace(":","").replace("-","").replace(" ","")
 291
 292                 # Get URL to the "Original" size of the photo,
 293                 # falling back to "Large" and then "Medium" if needed
 294                 imgurl = photo.getAttribute("url_o")
 295                 imgsz = '_o';
 296                 if imgurl == "":
 297                     imgurl = photo.getAttribute("url_l")
 298                     imgsz = '_l';
 299                 if imgurl == "":
 300                     imgurl = photo.getAttribute("url_m")
 301                     imgsz = '_m';
 302
 303                 # Build the target filename
 304                 target = dir + "/" + taken + "-" + photoid + imgsz + ".jpg"
 305
 306                 # Skip files that exist
 307                 if os.access(target, os.R_OK):
 308                     inodes[photoid] = target
 309                     continue
 310
 311                 # Look it up in our dictionary of inodes first
 312                 if photoid in inodes and inodes[photoid] and os.access(inodes[photoid], os.R_OK):
 313                     # woo, we have it already, use a hard-link
 314                     os.link(inodes[photoid], target)
 315                 else:
 316                     # Grab image and save to local file
 317                     if imgurl:
 318                         inodes[photoid] = getphoto(imgurl, target)
 319                     else:
 320                         print "Failed to find URL for photo id " + photoid
 321
 322             # Move on the next page
 323             page = page + 1