]> Tony Duckles's Git Repositories (git.nynim.org) - flickrtouchr.git/blob - flickrtouchr.py
Deal with unicode set names
[flickrtouchr.git] / flickrtouchr.py
1 #!/usr/bin/env python
2
3 #
4 # FlickrTouchr - a simple python script to grab all your photos from flickr,
5 # dump into a directory - organised into folders by set -
6 # along with any favourites you have saved.
7 #
8 # You can then sync the photos to an iPod touch.
9 #
10 # Version: 1.2
11 #
12 # Original Author: colm - AT - allcosts.net - Colm MacCarthaigh - 2008-01-21
13 #
14 # Modified by: Dan Benjamin - http://hivelogic.com
15 #
16 # License: Apache 2.0 - http://www.apache.org/licenses/LICENSE-2.0.html
17 #
18
19 import xml.dom.minidom
20 import webbrowser
21 import urlparse
22 import urllib2
23 import unicodedata
24 import cPickle
25 import md5
26 import sys
27 import os
28
29 API_KEY = "e224418b91b4af4e8cdb0564716fa9bd"
30 SHARED_SECRET = "7cddb9c9716501a0"
31
32 #
33 # Utility functions for dealing with flickr authentication
34 #
35 def getText(nodelist):
36 rc = ""
37 for node in nodelist:
38 if node.nodeType == node.TEXT_NODE:
39 rc = rc + node.data
40 return rc.encode("utf-8")
41
42 #
43 # Get the frob based on our API_KEY and shared secret
44 #
45 def getfrob():
46 # Create our signing string
47 string = SHARED_SECRET + "api_key" + API_KEY + "methodflickr.auth.getFrob"
48 hash = md5.new(string).digest().encode("hex")
49
50 # Formulate the request
51 url = "http://api.flickr.com/services/rest/?method=flickr.auth.getFrob"
52 url += "&api_key=" + API_KEY + "&api_sig=" + hash
53
54 try:
55 # Make the request and extract the frob
56 response = urllib2.urlopen(url)
57
58 # Parse the XML
59 dom = xml.dom.minidom.parse(response)
60
61 # get the frob
62 frob = getText(dom.getElementsByTagName("frob")[0].childNodes)
63
64 # Free the DOM
65 dom.unlink()
66
67 # Return the frob
68 return frob
69
70 except:
71 raise "Could not retrieve frob"
72
73 #
74 # Login and get a token
75 #
76 def froblogin(frob, perms):
77 string = SHARED_SECRET + "api_key" + API_KEY + "frob" + frob + "perms" + perms
78 hash = md5.new(string).digest().encode("hex")
79
80 # Formulate the request
81 url = "http://api.flickr.com/services/auth/?"
82 url += "api_key=" + API_KEY + "&perms=" + perms
83 url += "&frob=" + frob + "&api_sig=" + hash
84
85 # Tell the user what's happening
86 print "In order to allow FlickrTouchr to read your photos and favourites"
87 print "you need to allow the application. Please press return when you've"
88 print "granted access at the following url (which should have opened"
89 print "automatically)."
90 print
91 print url
92 print
93 print "Waiting for you to press return"
94
95 # We now have a login url, open it in a web-browser
96 webbrowser.open_new(url)
97
98 # Wait for input
99 sys.stdin.readline()
100
101 # Now, try and retrieve a token
102 string = SHARED_SECRET + "api_key" + API_KEY + "frob" + frob + "methodflickr.auth.getToken"
103 hash = md5.new(string).digest().encode("hex")
104
105 # Formulate the request
106 url = "http://api.flickr.com/services/rest/?method=flickr.auth.getToken"
107 url += "&api_key=" + API_KEY + "&frob=" + frob
108 url += "&api_sig=" + hash
109
110 # See if we get a token
111 try:
112 # Make the request and extract the frob
113 response = urllib2.urlopen(url)
114
115 # Parse the XML
116 dom = xml.dom.minidom.parse(response)
117
118 # get the token and user-id
119 token = getText(dom.getElementsByTagName("token")[0].childNodes)
120 nsid = dom.getElementsByTagName("user")[0].getAttribute("nsid")
121
122 # Free the DOM
123 dom.unlink()
124
125 # Return the token and userid
126 return (nsid, token)
127 except:
128 raise "Login failed"
129
130 #
131 # Sign an arbitrary flickr request with a token
132 #
133 def flickrsign(url, token):
134 query = urlparse.urlparse(url).query
135 query += "&api_key=" + API_KEY + "&auth_token=" + token
136 params = query.split('&')
137
138 # Create the string to hash
139 string = SHARED_SECRET
140
141 # Sort the arguments alphabettically
142 params.sort()
143 for param in params:
144 string += param.replace('=', '')
145 hash = md5.new(string).digest().encode("hex")
146
147 # Now, append the api_key, and the api_sig args
148 url += "&api_key=" + API_KEY + "&auth_token=" + token + "&api_sig=" + hash
149
150 # Return the signed url
151 return url
152
153 #
154 # Grab the photo from the server
155 #
156 def getphoto(id, token, filename):
157 try:
158 # Contruct a request to find the sizes
159 url = "http://api.flickr.com/services/rest/?method=flickr.photos.getSizes"
160 url += "&photo_id=" + id
161
162 # Sign the request
163 url = flickrsign(url, token)
164
165 # Make the request
166 response = urllib2.urlopen(url)
167
168 # Parse the XML
169 dom = xml.dom.minidom.parse(response)
170
171 # Get the list of sizes
172 sizes = dom.getElementsByTagName("size")
173
174 # Grab the original if it exists
175 if (sizes[-1].getAttribute("label") == "Original"):
176 imgurl = sizes[-1].getAttribute("source")
177 else:
178 print "Failed to get original for photo id " + id
179
180
181 # Free the DOM memory
182 dom.unlink()
183
184 # Grab the image file
185 response = urllib2.urlopen(imgurl)
186 data = response.read()
187
188 # Save the file!
189 fh = open(filename, "w")
190 fh.write(data)
191 fh.close()
192
193 return filename
194 except:
195 print "Failed to retrieve photo id " + id
196
197 ######## Main Application ##########
198 if __name__ == '__main__':
199
200 # The first, and only argument needs to be a directory
201 try:
202 os.chdir(sys.argv[1])
203 except:
204 print "usage: %s directory" % sys.argv[0]
205 sys.exit(1)
206
207 # First things first, see if we have a cached user and auth-token
208 try:
209 cache = open("touchr.frob.cache", "r")
210 config = cPickle.load(cache)
211 cache.close()
212
213 # We don't - get a new one
214 except:
215 (user, token) = froblogin(getfrob(), "read")
216 config = { "version":1 , "user":user, "token":token }
217
218 # Save it for future use
219 cache = open("touchr.frob.cache", "w")
220 cPickle.dump(config, cache)
221 cache.close()
222
223 # Now, construct a query for the list of photo sets
224 url = "http://api.flickr.com/services/rest/?method=flickr.photosets.getList"
225 url += "&user_id=" + config["user"]
226 url = flickrsign(url, config["token"])
227
228 # get the result
229 response = urllib2.urlopen(url)
230
231 # Parse the XML
232 dom = xml.dom.minidom.parse(response)
233
234 # Get the list of Sets
235 sets = dom.getElementsByTagName("photoset")
236
237 # For each set - create a url
238 urls = []
239 for set in sets:
240 pid = set.getAttribute("id")
241 dir = getText(set.getElementsByTagName("title")[0].childNodes)
242 dir = unicodedata.normalize('NFKD', dir.decode("utf-8", "ignore")).encode('ASCII', 'ignore') # Normalize to ASCII
243
244 # Build the list of photos
245 url = "http://api.flickr.com/services/rest/?method=flickr.photosets.getPhotos"
246 url += "&photoset_id=" + pid
247
248 # Append to our list of urls
249 urls.append( (url , dir) )
250
251 # Free the DOM memory
252 dom.unlink()
253
254 # Add the photos which are not in any set
255 url = "http://api.flickr.com/services/rest/?method=flickr.photos.getNotInSet"
256 urls.append( (url, "No Set") )
257
258 # Add the user's Favourites
259 url = "http://api.flickr.com/services/rest/?method=flickr.favorites.getList"
260 urls.append( (url, "Favourites") )
261
262 # Time to get the photos
263 inodes = {}
264 for (url , dir) in urls:
265 # Create the directory
266 try:
267 os.makedirs(dir)
268 except:
269 pass
270
271 # Get 500 results per page
272 url += "&per_page=500"
273 pages = page = 1
274
275 while page <= pages:
276 request = url + "&page=" + str(page)
277
278 # Sign the url
279 request = flickrsign(request, config["token"])
280
281 # Make the request
282 response = urllib2.urlopen(request)
283
284 # Parse the XML
285 dom = xml.dom.minidom.parse(response)
286
287 # Get the total
288 pages = int(dom.getElementsByTagName("photo")[0].parentNode.getAttribute("pages"))
289
290 # Grab the photos
291 for photo in dom.getElementsByTagName("photo"):
292 # Tell the user we're grabbing the file
293 print photo.getAttribute("title").encode("utf8") + " ... in set ... " + dir
294
295 # Grab the id
296 photoid = photo.getAttribute("id")
297
298 # The target
299 target = dir + "/" + photoid + ".jpg"
300
301 # Skip files that exist
302 if os.access(target, os.R_OK):
303 continue
304
305 # Look it up in our dictionary of inodes first
306 if inodes.has_key(photoid) and os.access(inodes[photoid], os.R_OK):
307 # woo, we have it already, use a hard-link
308 os.link(inodes[photoid], target)
309 else:
310 inodes[photoid] = getphoto(photo.getAttribute("id"), config["token"], target)
311
312 # Move on the next page
313 page = page + 1