]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn/svnclient.py
svn2svn v1.5.0
[svn2svn.git] / svn2svn / svnclient.py
1 """ SVN client functions """
2
3 from shell import run_svn
4 from errors import EmptySVNLog
5
6 import os
7 import time
8 import calendar
9 import operator
10 import urllib
11
12 try:
13 from xml.etree import cElementTree as ET
14 except ImportError:
15 try:
16 from xml.etree import ElementTree as ET
17 except ImportError:
18 try:
19 import cElementTree as ET
20 except ImportError:
21 from elementtree import ElementTree as ET
22
23 _identity_table = "".join(map(chr, range(256)))
24 _forbidden_xml_chars = "".join(
25 set(map(chr, range(32))) - set('\x09\x0A\x0D')
26 )
27
28
29 def strip_forbidden_xml_chars(xml_string):
30 """
31 Given an XML string, strips forbidden characters as per the XML spec.
32 (these are all control characters except 0x9, 0xA and 0xD).
33 """
34 return xml_string.translate(_identity_table, _forbidden_xml_chars)
35
36 def safe_path(path, rev_number=None):
37 """
38 Build a path to pass as a SVN command-line arg.
39 """
40 # URL-escape URL's, but leave local WC paths alone
41 if "://" in path:
42 path = urllib.quote(path, ":/")
43 # Add peg revision
44 if rev_number is not None:
45 path += "@"+str(rev_number)
46 # Else, if path already contains an "@", add a trailing "@" to "escape" the earlier "@".
47 elif "@" in path:
48 path += "@"
49 return path
50
51 def svn_date_to_timestamp(svn_date):
52 """
53 Parse an SVN date as read from the XML output and return the corresponding
54 timestamp.
55 """
56 # Strip microseconds and timezone (always UTC, hopefully)
57 # XXX there are various ISO datetime parsing routines out there,
58 # cf. http://seehuhn.de/comp/pdate
59 date = svn_date.split('.', 2)[0]
60 time_tuple = time.strptime(date, "%Y-%m-%dT%H:%M:%S")
61 return calendar.timegm(time_tuple)
62
63 def parse_svn_info_xml(xml_string):
64 """
65 Parse the XML output from an "svn info" command and extract useful information
66 as a dict.
67 """
68 d = {}
69 xml_string = strip_forbidden_xml_chars(xml_string)
70 tree = ET.fromstring(xml_string)
71 entry = tree.find('.//entry')
72 d['url'] = entry.find('url').text
73 d['kind'] = entry.get('kind')
74 d['revision'] = int(entry.get('revision'))
75 d['repos_url'] = tree.find('.//repository/root').text
76 d['repos_uuid'] = tree.find('.//repository/uuid').text
77 d['last_changed_rev'] = int(tree.find('.//commit').get('revision'))
78 author_element = tree.find('.//commit/author')
79 if author_element is not None:
80 d['last_changed_author'] = author_element.text
81 d['last_changed_date'] = svn_date_to_timestamp(tree.find('.//commit/date').text)
82 return d
83
84 def get_kind(svn_repos_url, svn_path, svn_rev, action, paths):
85 """
86 Calculate the "kind"-type of a given URL in the SVN repo.
87 """
88 # By default, just do a simple "svn info" based on passed-in params.
89 info_path = svn_path
90 info_rev = svn_rev
91 if action == 'D':
92 # For deletions, we can't do an "svn info" at this revision.
93 # Need to trace ancestry backwards.
94 parents = []
95 for p in paths:
96 # Build a list of any copy-from's in this log_entry that we're a child of.
97 if p['kind'] == 'dir' and p['copyfrom_revision'] and svn_path.startswith(p['path']+"/"):
98 parents.append(p['path'])
99 if parents:
100 # Use the nearest copy-from'd parent
101 parents.sort()
102 parent = parents[len(parents)-1]
103 for p in paths:
104 if parent == p['path']:
105 info_path = info_path.replace(p['path'], p['copyfrom_path'])
106 info_rev = p['copyfrom_revision']
107 else:
108 # If no parent copy-from's, then we should be able to check this path in
109 # the preceeding revision.
110 info_rev -= 1
111 info = get_svn_info(svn_repos_url+info_path, info_rev)
112 return info['kind']
113
114 def parse_svn_log_xml(xml_string):
115 """
116 Parse the XML output from an "svn log" command and extract useful information
117 as a list of dicts (one per log changeset).
118 """
119 l = []
120 xml_string = strip_forbidden_xml_chars(xml_string)
121 tree = ET.fromstring(xml_string)
122 for entry in tree.findall('logentry'):
123 d = {}
124 d['revision'] = int(entry.get('revision'))
125 # Some revisions don't have authors, most notably the first revision
126 # in a repository.
127 # logentry nodes targeting directories protected by path-based
128 # authentication have no child nodes at all. We return an entry
129 # in that case. Anyway, as it has no path entries, no further
130 # processing will be made.
131 author = entry.find('author')
132 date = entry.find('date')
133 msg = entry.find('msg')
134 d['author'] = author is not None and author.text or "No author"
135 d['date_raw'] = date.text if date is not None else None
136 d['date'] = svn_date_to_timestamp(date.text) if date is not None else None
137 d['message'] = msg is not None and msg.text and msg.text.replace('\r\n', '\n').replace('\n\r', '\n').replace('\r', '\n') or ""
138 paths = []
139 for path in entry.findall('.//paths/path'):
140 copyfrom_rev = path.get('copyfrom-rev')
141 if copyfrom_rev:
142 copyfrom_rev = int(copyfrom_rev)
143 paths.append({
144 'path': path.text,
145 'kind': path.get('kind'),
146 'action': path.get('action'),
147 'copyfrom_path': path.get('copyfrom-path'),
148 'copyfrom_revision': copyfrom_rev,
149 })
150 # Sort paths (i.e. into hierarchical order), so that process_svn_log_entry()
151 # can process actions in depth-first order.
152 d['changed_paths'] = sorted(paths, key=operator.itemgetter('path'))
153 revprops = []
154 for prop in entry.findall('.//revprops/property'):
155 revprops.append({ 'name': prop.get('name'), 'value': prop.text })
156 d['revprops'] = revprops
157 l.append(d)
158 return l
159
160 def parse_svn_status_xml(xml_string, base_dir=None, ignore_externals=False):
161 """
162 Parse the XML output from an "svn status" command and extract useful info
163 as a list of dicts (one per status entry).
164 """
165 if base_dir:
166 base_dir = os.path.normcase(base_dir)
167 l = []
168 xml_string = strip_forbidden_xml_chars(xml_string)
169 tree = ET.fromstring(xml_string)
170 for entry in tree.findall('.//entry'):
171 d = {}
172 path = entry.get('path')
173 if base_dir is not None and os.path.normcase(path).startswith(base_dir):
174 path = path[len(base_dir):].lstrip('/\\')
175 d['path'] = path
176 wc_status = entry.find('wc-status')
177 if wc_status.get('item') == 'external':
178 if ignore_externals:
179 continue
180 status = wc_status.get('item')
181 revision = wc_status.get('revision')
182 if status == 'external':
183 d['type'] = 'external'
184 elif revision is not None:
185 d['type'] = 'normal'
186 else:
187 d['type'] = 'unversioned'
188 d['status'] = status
189 d['revision'] = revision
190 d['props'] = wc_status.get('props')
191 d['copied'] = wc_status.get('copied')
192 l.append(d)
193 return l
194
195 def get_svn_rev(svn_url_or_wc, rev_number):
196 """
197 Evaluate a given SVN revision pattern, to map it to a discrete rev #.
198 """
199 xml_string = run_svn(['info', '--xml', '-r', rev_number, safe_path(svn_url_or_wc, rev_number)], fail_if_stderr=True)
200 info = parse_svn_info_xml(xml_string)
201 return info['revision']
202
203 def get_svn_info(svn_url_or_wc, rev_number=None):
204 """
205 Get SVN information for the given URL or working copy, with an optionally
206 specified revision number.
207 Returns a dict as created by parse_svn_info_xml().
208 """
209 args = ['info', '--xml']
210 if rev_number is not None:
211 args += ["-r", rev_number]
212 args += [safe_path(svn_url_or_wc, rev_number)]
213 xml_string = run_svn(args, fail_if_stderr=True)
214 return parse_svn_info_xml(xml_string)
215
216 def svn_checkout(svn_url, checkout_dir, rev_number=None):
217 """
218 Checkout the given URL at an optional revision number.
219 """
220 args = ['checkout', '-q']
221 if rev_number is not None:
222 args += ['-r', rev_number]
223 args += [safe_path(svn_url, rev_number), checkout_dir]
224 return run_svn(args)
225
226 def run_svn_log(svn_url_or_wc, rev_start, rev_end, limit, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
227 """
228 Fetch up to 'limit' SVN log entries between the given revisions.
229 """
230 args = ['log', '--xml']
231 if stop_on_copy:
232 args += ['--stop-on-copy']
233 if get_changed_paths:
234 args += ['-v']
235 if get_revprops:
236 args += ['--with-all-revprops']
237 args += ['-r', '%s:%s' % (rev_start, rev_end)]
238 args += ['--limit', str(limit), safe_path(svn_url_or_wc, max(rev_start, rev_end))]
239 xml_string = run_svn(args)
240 return parse_svn_log_xml(xml_string)
241
242 def get_svn_status(svn_wc, quiet=False, no_recursive=False):
243 """
244 Get SVN status information about the given working copy.
245 """
246 # Ensure proper stripping by canonicalizing the path
247 svn_wc = os.path.abspath(svn_wc)
248 args = ['status', '--xml', '--ignore-externals']
249 if quiet:
250 args += ['-q']
251 else:
252 args += ['-v']
253 if no_recursive:
254 args += ['-N']
255 xml_string = run_svn(args + [safe_path(svn_wc)])
256 return parse_svn_status_xml(xml_string, svn_wc, ignore_externals=True)
257
258 def get_svn_versioned_files(svn_wc):
259 """
260 Get the list of versioned files in the SVN working copy.
261 """
262 contents = []
263 for e in get_svn_status(svn_wc):
264 if e['path'] and e['type'] == 'normal':
265 contents.append(e['path'])
266 return contents
267
268 def get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
269 """
270 Get the first SVN log entry in the requested revision range.
271 """
272 entries = run_svn_log(svn_url, rev_start, rev_end, 1, stop_on_copy, get_changed_paths, get_revprops)
273 if entries:
274 return entries[0]
275 raise EmptySVNLog("No SVN log for %s between revisions %s and %s" %
276 (svn_url, rev_start, rev_end))
277
278 def get_first_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True, get_changed_paths=True):
279 """
280 Get the first log entry after (or at) the given revision number in an SVN branch.
281 By default the revision number is set to 0, which will give you the log
282 entry corresponding to the branch creaction.
283
284 NOTE: to know whether the branch creation corresponds to an SVN import or
285 a copy from another branch, inspect elements of the 'changed_paths' entry
286 in the returned dictionary.
287 """
288 return get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=stop_on_copy, get_changed_paths=get_changed_paths)
289
290 def get_last_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True, get_changed_paths=True):
291 """
292 Get the last log entry before/at the given revision number in an SVN branch.
293 By default the revision number is set to HEAD, which will give you the log
294 entry corresponding to the latest commit in branch.
295 """
296 return get_one_svn_log_entry(svn_url, rev_end, rev_start, stop_on_copy=stop_on_copy, get_changed_paths=get_changed_paths)
297
298
299 log_duration_threshold = 10.0
300 log_min_chunk_length = 10
301 log_max_chunk_length = 10000
302
303 def iter_svn_log_entries(svn_url, first_rev, last_rev, stop_on_copy=False, get_changed_paths=True, get_revprops=False, ancestors=[]):
304 """
305 Iterate over SVN log entries between first_rev and last_rev.
306
307 This function features chunked log fetching so that it isn't too nasty
308 to the SVN server if many entries are requested.
309
310 NOTE: If *not* passing in the explicit (pre-calculated) 'ancestors' list,
311 this chunked log fetching *ONLY* works correctly on paths which
312 are known to have existed unbroken in the SVN repository, e.g. /trunk.
313 Chunked fetching breaks down if a path existed in earlier, then was
314 deleted, and later was re-created. For example, if path was created in r5,
315 then deleted in r1000, and then later re-created in r5000...
316 svn log --stop-on-copy --limit 1 -r 1:50 "path/to/file"
317 --> would yield r5, i.e. the _initial_ creation
318 svn log --stop-on-copy --limit 1 -r 1:HEAD "path/to/file"
319 --> would yield r5000, i.e. the _re-creation_
320 Use run/svn2svn.py:find_svn_ancestors() to pass in the 'ancestors' array
321 so that we can correctly re-trace ancestry here.
322 """
323 info = get_svn_info(svn_url)
324 svn_repos_url = info['repos_url']
325 #print "iter_svn_log_entries: %s %s:%s" % (svn_url, first_rev, last_rev)
326 if last_rev == "HEAD":
327 last_rev = info['revision']
328 if int(first_rev) == 1:
329 start_log = get_first_svn_log_entry(svn_url, first_rev, last_rev, stop_on_copy=stop_on_copy, get_changed_paths=False)
330 if start_log['revision'] > first_rev:
331 first_rev = start_log['revision']
332 #print "first_rev: %s" % first_rev
333 cur_url = svn_url
334 cur_rev = first_rev
335 cur_anc_idx = None
336 cur_anc_end_rev = None
337 if ancestors:
338 #print ancestors
339 # Crawl ancestry, from oldest to newest
340 for idx in range(len(ancestors)-1, -1, -1): # [n-1,...,0]
341 #print "(pre) Match ancestors[%s]: %s" % (idx, ancestors[idx])
342 cur_url = svn_repos_url+ancestors[idx]['copyfrom_path']
343 cur_anc_idx = idx
344 if first_rev < int(ancestors[idx]['copyfrom_rev']):
345 cur_anc_end_rev = int(ancestors[idx]['copyfrom_rev'])
346 break
347 if cur_anc_end_rev is None:
348 #print "(pre) Match ancestors[0] (final): %s" % (ancestors[0])
349 cur_anc_idx = -1
350 cur_url = svn_repos_url+ancestors[0]['path']
351 chunk_length = log_min_chunk_length
352 while cur_rev <= last_rev:
353 #print "cur_rev:%s cur_anc_end_rev:%s cur_anc_idx:%s %s" % (cur_rev, str(cur_anc_end_rev), cur_anc_idx, cur_url)
354 if cur_anc_end_rev and cur_rev >= cur_anc_end_rev:
355 cur_rev = int(ancestors[cur_anc_idx]['revision'])
356 cur_anc_idx -= 1
357 if cur_anc_idx >= 0:
358 idx = cur_anc_idx
359 #print "(loop) Match ancestors[%s]: %s" % (idx, ancestors[idx])
360 cur_url = svn_repos_url+ancestors[idx]['copyfrom_path']
361 cur_anc_end_rev = int(ancestors[idx]['copyfrom_rev'])
362 else:
363 #print "(loop) Match ancestors[0] (final): %s" % (ancestors[0])
364 cur_url = svn_repos_url+ancestors[0]['path']
365 cur_anc_end_rev = None
366 #print "cur_rev:%s cur_anc_end_rev:%s cur_anc_idx:%s %s" % (cur_rev, str(cur_anc_end_rev), cur_anc_idx, cur_url)
367 start_t = time.time()
368 stop_rev = min(last_rev, cur_rev + chunk_length)
369 stop_rev = min(stop_rev, cur_anc_end_rev) if cur_anc_end_rev else stop_rev
370 entries = run_svn_log(cur_url, cur_rev, stop_rev, chunk_length,
371 stop_on_copy, get_changed_paths, get_revprops)
372 duration = time.time() - start_t
373 if entries:
374 for e in entries:
375 if e['revision'] > last_rev:
376 break
377 # Embed the current URL in the yielded dict, for ancestor cases where
378 # we might have followed a copy-from to some non-original URL.
379 e['url'] = cur_url
380 yield e
381 if e['revision'] >= last_rev:
382 break
383 cur_rev = int(e['revision'])+1
384 else:
385 cur_rev = int(stop_rev)+1
386 # Adapt chunk length based on measured request duration
387 if duration < log_duration_threshold:
388 chunk_length = min(log_max_chunk_length, int(chunk_length * 2.0))
389 elif duration > log_duration_threshold * 2:
390 chunk_length = max(log_min_chunk_length, int(chunk_length / 2.0))
391
392
393 _svn_client_version = None
394
395 def get_svn_client_version():
396 """
397 Returns the SVN client version as a tuple.
398
399 The returned tuple only contains numbers, non-digits in version string are
400 silently ignored.
401 """
402 global _svn_client_version
403 if _svn_client_version is None:
404 raw = run_svn(['--version', '-q']).strip()
405 _svn_client_version = tuple(map(int, [x for x in raw.split('.')
406 if x.isdigit()]))
407 return _svn_client_version
408
409
410 def parse_svn_propget_xml(xml_string):
411 """
412 Parse the XML output from an "svn propget" command and extract useful
413 information as a dict.
414 """
415 d = {}
416 xml_string = strip_forbidden_xml_chars(xml_string)
417 tree = ET.fromstring(xml_string)
418 prop = tree.find('.//property')
419 d['name'] = prop.get('name')
420 d['value'] = prop is not None and prop.text and prop.text.replace('\r\n', '\n').replace('\n\r', '\n').replace('\r', '\n') or ""
421 return d
422
423 def parse_svn_proplist_xml(xml_string):
424 """
425 Parse the XML output from an "svn proplist" command and extract list
426 of property-names.
427 """
428 l = []
429 xml_string = strip_forbidden_xml_chars(xml_string)
430 tree = ET.fromstring(xml_string)
431 for prop in tree.findall('.//property'):
432 l.append(prop.get('name'))
433 return l
434
435 def get_prop_value(svn_url_or_wc, prop_name, rev_number=None):
436 """
437 Get the value of a versioned property for the given path.
438 """
439 args = ['propget', '--xml']
440 if rev_number:
441 args += ['-r', rev_number]
442 args += [prop_name, safe_path(svn_url_or_wc, rev_number)]
443 xml_string = run_svn(args)
444 return parse_svn_propget_xml(xml_string)
445
446 def get_all_props(svn_url_or_wc, rev_number=None):
447 """
448 Get the values of all versioned properties for the given path.
449 """
450 l = {}
451 args = ['proplist', '--xml']
452 if rev_number:
453 args += ['-r', rev_number]
454 args += [safe_path(svn_url_or_wc, rev_number)]
455 xml_string = run_svn(args)
456 props = parse_svn_proplist_xml(xml_string)
457 for prop_name in props:
458 d = get_prop_value(svn_url_or_wc, prop_name, rev_number)
459 l[d['name']] = d['value']
460 return l