]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn/svnclient.py
svn2svn v1.6.0
[svn2svn.git] / svn2svn / svnclient.py
1 """ SVN client functions """
2
3 from shell import run_svn
4 from errors import EmptySVNLog
5
6 import os
7 import time
8 import calendar
9 import operator
10 import urllib
11
12 try:
13 from xml.etree import cElementTree as ET
14 except ImportError:
15 try:
16 from xml.etree import ElementTree as ET
17 except ImportError:
18 try:
19 import cElementTree as ET
20 except ImportError:
21 from elementtree import ElementTree as ET
22
23 _identity_table = "".join(map(chr, range(256)))
24 _forbidden_xml_chars = "".join(
25 set(map(chr, range(32))) - set('\x09\x0A\x0D')
26 )
27
28
29 def _strip_forbidden_xml_chars(xml_string):
30 """
31 Given an XML string, strips forbidden characters as per the XML spec.
32 (these are all control characters except 0x9, 0xA and 0xD).
33 """
34 return xml_string.translate(_identity_table, _forbidden_xml_chars)
35
36 def safe_path(path, rev_number=None):
37 """
38 Build a path to pass as a SVN command-line arg.
39 """
40 # URL-escape URL's, but leave local WC paths alone
41 if "://" in path:
42 path = urllib.quote(path, ":/")
43 # Add peg revision
44 if rev_number is not None:
45 path += "@"+str(rev_number)
46 # Else, if path already contains an "@", add a trailing "@" to "escape" the earlier "@".
47 elif "@" in path:
48 path += "@"
49 return path
50
51 def _svn_date_to_timestamp(svn_date):
52 """
53 Parse an SVN date as read from the XML output and return the corresponding
54 timestamp.
55 """
56 # Strip microseconds and timezone (always UTC, hopefully)
57 # XXX there are various ISO datetime parsing routines out there,
58 # cf. http://seehuhn.de/comp/pdate
59 date = svn_date.split('.', 2)[0]
60 time_tuple = time.strptime(date, "%Y-%m-%dT%H:%M:%S")
61 return calendar.timegm(time_tuple)
62
63 def _parse_svn_info_xml(xml_string):
64 """
65 Parse the XML output from an "svn info" command and extract useful information
66 as a dict.
67 """
68 d = {}
69 xml_string = _strip_forbidden_xml_chars(xml_string)
70 tree = ET.fromstring(xml_string)
71 entry = tree.find('.//entry')
72 d['url'] = entry.find('url').text
73 d['kind'] = entry.get('kind')
74 d['revision'] = int(entry.get('revision'))
75 d['repos_url'] = tree.find('.//repository/root').text
76 d['repos_uuid'] = tree.find('.//repository/uuid').text
77 d['last_changed_rev'] = int(tree.find('.//commit').get('revision'))
78 author_element = tree.find('.//commit/author')
79 if author_element is not None:
80 d['last_changed_author'] = author_element.text
81 d['last_changed_date'] = _svn_date_to_timestamp(tree.find('.//commit/date').text)
82 # URL-decode "url" and "repos_url" values, since all paths passed
83 # to run_svn() should be filtered through safe_path() and we don't
84 # want to *double* URL-encode paths which are constructed used these values.
85 d['url'] = urllib.unquote(d['url'])
86 d['repos_url'] = urllib.unquote(d['repos_url'])
87 return d
88
89 def get_kind(svn_repos_url, svn_path, svn_rev, action, paths):
90 """
91 Calculate the "kind"-type of a given URL in the SVN repo.
92 """
93 # By default, just do a simple "svn info" based on passed-in params.
94 info_path = svn_path
95 info_rev = svn_rev
96 if action == 'D':
97 # For deletions, we can't do an "svn info" at this revision.
98 # Need to trace ancestry backwards.
99 parents = []
100 for p in paths:
101 # Build a list of any copy-from's in this log_entry that we're a child of.
102 if p['kind'] == 'dir' and p['copyfrom_revision'] and svn_path.startswith(p['path']+"/"):
103 parents.append(p['path'])
104 if parents:
105 # Use the nearest copy-from'd parent
106 parents.sort()
107 parent = parents[len(parents)-1]
108 for p in paths:
109 if parent == p['path']:
110 info_path = info_path.replace(p['path'], p['copyfrom_path'])
111 info_rev = p['copyfrom_revision']
112 else:
113 # If no parent copy-from's, then we should be able to check this path in
114 # the preceeding revision.
115 info_rev -= 1
116 svn_info = info(svn_repos_url+info_path, info_rev)
117 return svn_info['kind']
118
119 def _parse_svn_log_xml(xml_string):
120 """
121 Parse the XML output from an "svn log" command and extract useful information
122 as a list of dicts (one per log changeset).
123 """
124 l = []
125 xml_string = _strip_forbidden_xml_chars(xml_string)
126 tree = ET.fromstring(xml_string)
127 for entry in tree.findall('logentry'):
128 d = {}
129 d['revision'] = int(entry.get('revision'))
130 # Some revisions don't have authors, most notably the first revision
131 # in a repository.
132 # logentry nodes targeting directories protected by path-based
133 # authentication have no child nodes at all. We return an entry
134 # in that case. Anyway, as it has no path entries, no further
135 # processing will be made.
136 author = entry.find('author')
137 date = entry.find('date')
138 msg = entry.find('msg')
139 d['author'] = author is not None and author.text or "No author"
140 d['date_raw'] = date.text if date is not None else None
141 d['date'] = _svn_date_to_timestamp(date.text) if date is not None else None
142 d['message'] = msg is not None and msg.text and msg.text.replace('\r\n', '\n').replace('\n\r', '\n').replace('\r', '\n') or ""
143 paths = []
144 for path in entry.findall('.//paths/path'):
145 copyfrom_rev = path.get('copyfrom-rev')
146 if copyfrom_rev:
147 copyfrom_rev = int(copyfrom_rev)
148 paths.append({
149 'path': path.text,
150 'kind': path.get('kind'),
151 'action': path.get('action'),
152 'copyfrom_path': path.get('copyfrom-path'),
153 'copyfrom_revision': copyfrom_rev,
154 })
155 # Sort paths (i.e. into hierarchical order), so that process_svn_log_entry()
156 # can process actions in depth-first order.
157 d['changed_paths'] = sorted(paths, key=operator.itemgetter('path'))
158 revprops = []
159 for prop in entry.findall('.//revprops/property'):
160 revprops.append({ 'name': prop.get('name'), 'value': prop.text })
161 d['revprops'] = revprops
162 l.append(d)
163 return l
164
165 def _parse_svn_status_xml(xml_string, base_dir=None, ignore_externals=False):
166 """
167 Parse the XML output from an "svn status" command and extract useful info
168 as a list of dicts (one per status entry).
169 """
170 if base_dir:
171 base_dir = os.path.normcase(base_dir)
172 l = []
173 xml_string = _strip_forbidden_xml_chars(xml_string)
174 tree = ET.fromstring(xml_string)
175 for entry in tree.findall('.//entry'):
176 d = {}
177 path = entry.get('path')
178 if base_dir is not None and os.path.normcase(path).startswith(base_dir):
179 path = path[len(base_dir):].lstrip('/\\')
180 d['path'] = path
181 wc_status = entry.find('wc-status')
182 if wc_status.get('item') == 'external':
183 if ignore_externals:
184 continue
185 status = wc_status.get('item')
186 revision = wc_status.get('revision')
187 if status == 'external':
188 d['type'] = 'external'
189 elif revision is not None:
190 d['type'] = 'normal'
191 else:
192 d['type'] = 'unversioned'
193 d['status'] = status
194 d['revision'] = revision
195 d['props'] = wc_status.get('props')
196 d['copied'] = wc_status.get('copied')
197 l.append(d)
198 return l
199
200 def get_rev(svn_url_or_wc, rev_number):
201 """
202 Evaluate a given SVN revision pattern, to map it to a discrete rev #.
203 """
204 xml_string = run_svn(['info', '--xml', '-r', rev_number, safe_path(svn_url_or_wc, rev_number)], fail_if_stderr=True)
205 info = _parse_svn_info_xml(xml_string)
206 return info['revision']
207
208 def info(svn_url_or_wc, rev_number=None):
209 """
210 Get SVN information for the given URL or working copy, with an optionally
211 specified revision number.
212 Returns a dict as created by _parse_svn_info_xml().
213 """
214 args = ['info', '--xml']
215 if rev_number is not None:
216 args += ["-r", rev_number]
217 args += [safe_path(svn_url_or_wc, rev_number)]
218 xml_string = run_svn(args, fail_if_stderr=True)
219 return _parse_svn_info_xml(xml_string)
220
221 def svn_checkout(svn_url, checkout_dir, rev_number=None):
222 """
223 Checkout the given URL at an optional revision number.
224 """
225 args = ['checkout', '-q']
226 if rev_number is not None:
227 args += ['-r', rev_number]
228 args += [safe_path(svn_url, rev_number), checkout_dir]
229 return run_svn(args)
230
231 def run_svn_log(svn_url_or_wc, rev_start, rev_end, limit, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
232 """
233 Fetch up to 'limit' SVN log entries between the given revisions.
234 """
235 args = ['log', '--xml']
236 if stop_on_copy:
237 args += ['--stop-on-copy']
238 if get_changed_paths:
239 args += ['-v']
240 if get_revprops:
241 args += ['--with-all-revprops']
242 args += ['-r', '%s:%s' % (rev_start, rev_end)]
243 args += ['--limit', str(limit), safe_path(svn_url_or_wc, max(rev_start, rev_end))]
244 xml_string = run_svn(args)
245 return _parse_svn_log_xml(xml_string)
246
247 def status(svn_wc, quiet=False, non_recursive=False):
248 """
249 Get SVN status information about the given working copy.
250 """
251 # Ensure proper stripping by canonicalizing the path
252 svn_wc = os.path.abspath(svn_wc)
253 args = ['status', '--xml', '--ignore-externals']
254 if quiet:
255 args += ['-q']
256 else:
257 args += ['-v']
258 if non_recursive:
259 args += ['-N']
260 xml_string = run_svn(args + [safe_path(svn_wc)])
261 return _parse_svn_status_xml(xml_string, svn_wc, ignore_externals=True)
262
263 def get_svn_versioned_files(svn_wc):
264 """
265 Get the list of versioned files in the SVN working copy.
266 """
267 contents = []
268 for e in status(svn_wc):
269 if e['path'] and e['type'] == 'normal':
270 contents.append(e['path'])
271 return contents
272
273 def get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
274 """
275 Get the first SVN log entry in the requested revision range.
276 """
277 entries = run_svn_log(svn_url, rev_start, rev_end, 1, stop_on_copy, get_changed_paths, get_revprops)
278 if entries:
279 return entries[0]
280 raise EmptySVNLog("No SVN log for %s between revisions %s and %s" %
281 (svn_url, rev_start, rev_end))
282
283 def get_first_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True, get_changed_paths=True):
284 """
285 Get the first log entry after (or at) the given revision number in an SVN branch.
286 By default the revision number is set to 0, which will give you the log
287 entry corresponding to the branch creaction.
288
289 NOTE: to know whether the branch creation corresponds to an SVN import or
290 a copy from another branch, inspect elements of the 'changed_paths' entry
291 in the returned dictionary.
292 """
293 return get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=stop_on_copy, get_changed_paths=get_changed_paths)
294
295 def get_last_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True, get_changed_paths=True):
296 """
297 Get the last log entry before/at the given revision number in an SVN branch.
298 By default the revision number is set to HEAD, which will give you the log
299 entry corresponding to the latest commit in branch.
300 """
301 return get_one_svn_log_entry(svn_url, rev_end, rev_start, stop_on_copy=stop_on_copy, get_changed_paths=get_changed_paths)
302
303
304 log_duration_threshold = 10.0
305 log_min_chunk_length = 10
306 log_max_chunk_length = 10000
307
308 def iter_svn_log_entries(svn_url, first_rev, last_rev, stop_on_copy=False, get_changed_paths=True, get_revprops=False, ancestors=[]):
309 """
310 Iterate over SVN log entries between first_rev and last_rev.
311
312 This function features chunked log fetching so that it isn't too nasty
313 to the SVN server if many entries are requested.
314
315 NOTE: If *not* passing in the explicit (pre-calculated) 'ancestors' list,
316 this chunked log fetching *ONLY* works correctly on paths which
317 are known to have existed unbroken in the SVN repository, e.g. /trunk.
318 Chunked fetching breaks down if a path existed in earlier, then was
319 deleted, and later was re-created. For example, if path was created in r5,
320 then deleted in r1000, and then later re-created in r5000...
321 svn log --stop-on-copy --limit 1 -r 1:50 "path/to/file"
322 --> would yield r5, i.e. the _initial_ creation
323 svn log --stop-on-copy --limit 1 -r 1:HEAD "path/to/file"
324 --> would yield r5000, i.e. the _re-creation_
325 Use run/svn2svn.py:find_svn_ancestors() to pass in the 'ancestors' array
326 so that we can correctly re-trace ancestry here.
327 """
328 svn_info = info(svn_url)
329 svn_repos_url = svn_info['repos_url']
330 #print "iter_svn_log_entries: %s %s:%s" % (svn_url, first_rev, last_rev)
331 if last_rev == "HEAD":
332 last_rev = svn_info['revision']
333 if int(first_rev) == 1:
334 start_log = get_first_svn_log_entry(svn_url, first_rev, last_rev, stop_on_copy=stop_on_copy, get_changed_paths=False)
335 if start_log['revision'] > first_rev:
336 first_rev = start_log['revision']
337 #print "first_rev: %s" % first_rev
338 cur_url = svn_url
339 cur_rev = first_rev
340 cur_anc_idx = None
341 cur_anc_end_rev = None
342 if ancestors:
343 #print ancestors
344 # Crawl ancestry, from oldest to newest
345 for idx in range(len(ancestors)-1, -1, -1): # [n-1,...,0]
346 #print "(pre) Match ancestors[%s]: %s" % (idx, ancestors[idx])
347 cur_url = svn_repos_url+ancestors[idx]['copyfrom_path']
348 cur_anc_idx = idx
349 if first_rev < int(ancestors[idx]['copyfrom_rev']):
350 cur_anc_end_rev = int(ancestors[idx]['copyfrom_rev'])
351 break
352 if cur_anc_end_rev is None:
353 #print "(pre) Match ancestors[0] (final): %s" % (ancestors[0])
354 cur_anc_idx = -1
355 cur_url = svn_repos_url+ancestors[0]['path']
356 chunk_length = log_min_chunk_length
357 while cur_rev <= last_rev:
358 #print "cur_rev:%s cur_anc_end_rev:%s cur_anc_idx:%s %s" % (cur_rev, str(cur_anc_end_rev), cur_anc_idx, cur_url)
359 if cur_anc_end_rev and cur_rev >= cur_anc_end_rev:
360 cur_rev = int(ancestors[cur_anc_idx]['revision'])
361 cur_anc_idx -= 1
362 if cur_anc_idx >= 0:
363 idx = cur_anc_idx
364 #print "(loop) Match ancestors[%s]: %s" % (idx, ancestors[idx])
365 cur_url = svn_repos_url+ancestors[idx]['copyfrom_path']
366 cur_anc_end_rev = int(ancestors[idx]['copyfrom_rev'])
367 else:
368 #print "(loop) Match ancestors[0] (final): %s" % (ancestors[0])
369 cur_url = svn_repos_url+ancestors[0]['path']
370 cur_anc_end_rev = None
371 #print "cur_rev:%s cur_anc_end_rev:%s cur_anc_idx:%s %s" % (cur_rev, str(cur_anc_end_rev), cur_anc_idx, cur_url)
372 start_t = time.time()
373 stop_rev = min(last_rev, cur_rev + chunk_length)
374 stop_rev = min(stop_rev, cur_anc_end_rev) if cur_anc_end_rev else stop_rev
375 entries = run_svn_log(cur_url, cur_rev, stop_rev, chunk_length,
376 stop_on_copy, get_changed_paths, get_revprops)
377 duration = time.time() - start_t
378 if entries:
379 for e in entries:
380 if e['revision'] > last_rev:
381 break
382 # Embed the current URL in the yielded dict, for ancestor cases where
383 # we might have followed a copy-from to some non-original URL.
384 e['url'] = cur_url
385 yield e
386 if e['revision'] >= last_rev:
387 break
388 cur_rev = int(e['revision'])+1
389 else:
390 cur_rev = int(stop_rev)+1
391 # Adapt chunk length based on measured request duration
392 if duration < log_duration_threshold:
393 chunk_length = min(log_max_chunk_length, int(chunk_length * 2.0))
394 elif duration > log_duration_threshold * 2:
395 chunk_length = max(log_min_chunk_length, int(chunk_length / 2.0))
396
397
398 _svn_client_version = None
399
400 def version():
401 """
402 Returns the SVN client version as a tuple.
403
404 The returned tuple only contains numbers, non-digits in version string are
405 silently ignored.
406 """
407 global _svn_client_version
408 if _svn_client_version is None:
409 raw = run_svn(['--version', '-q']).strip()
410 _svn_client_version = tuple(map(int, [x for x in raw.split('.')
411 if x.isdigit()]))
412 return _svn_client_version
413
414
415 def _parse_svn_propget_xml(xml_string):
416 """
417 Parse the XML output from an "svn propget" command and extract useful
418 information as a dict.
419 """
420 d = {}
421 xml_string = _strip_forbidden_xml_chars(xml_string)
422 tree = ET.fromstring(xml_string)
423 prop = tree.find('.//property')
424 d['name'] = prop.get('name')
425 d['value'] = prop is not None and prop.text and prop.text.replace('\r\n', '\n').replace('\n\r', '\n').replace('\r', '\n') or ""
426 return d
427
428 def _parse_svn_proplist_xml(xml_string):
429 """
430 Parse the XML output from an "svn proplist" command and extract list
431 of property-names.
432 """
433 l = []
434 xml_string = _strip_forbidden_xml_chars(xml_string)
435 tree = ET.fromstring(xml_string)
436 for prop in tree.findall('.//property'):
437 l.append(prop.get('name'))
438 return l
439
440 def propget(svn_url_or_wc, prop_name, rev_number=None):
441 """
442 Get the value of a versioned property for the given path.
443 """
444 args = ['propget', '--xml']
445 if rev_number:
446 args += ['-r', rev_number]
447 args += [prop_name, safe_path(svn_url_or_wc, rev_number)]
448 xml_string = run_svn(args)
449 return _parse_svn_propget_xml(xml_string)
450
451 def propget_all(svn_url_or_wc, rev_number=None):
452 """
453 Get the values of all versioned properties for the given path.
454 """
455 l = {}
456 args = ['proplist', '--xml']
457 if rev_number:
458 args += ['-r', rev_number]
459 args += [safe_path(svn_url_or_wc, rev_number)]
460 xml_string = run_svn(args)
461 props = _parse_svn_proplist_xml(xml_string)
462 for prop_name in props:
463 d = propget(svn_url_or_wc, prop_name, rev_number)
464 l[d['name']] = d['value']
465 return l
466
467 def update(path, non_recursive=False):
468 """
469 Update a path in a working-copy.
470 """
471 args = ['update', '--ignore-externals']
472 if non_recursive:
473 args += ['-N']
474 args += [safe_path(path)]
475 run_svn(args)
476
477 def remove(path, force=False):
478 """
479 Remove a file/directory in a working-copy.
480 """
481 args = ['remove']
482 if force:
483 args += ['--force']
484 args += [safe_path(path)]
485 run_svn(args)
486
487 def export(svn_url, rev_number, path, non_recursive=False, force=False):
488 """
489 Export a file from a repo to a local path.
490 """
491 args = ['export', '--ignore-externals', '-r', rev_number]
492 if non_recursive:
493 args += ['-N']
494 if force:
495 args += ['--force']
496 args += [safe_path(svn_url, rev_number), safe_path(path)]
497 run_svn(args)
498
499 def _parse_svn_list_xml(xml_string):
500 """
501 Parse the XML output from an "svn list" command and extract list
502 of contents.
503 """
504 l = []
505 xml_string = _strip_forbidden_xml_chars(xml_string)
506 tree = ET.fromstring(xml_string)
507 d = []
508 for entry in tree.findall('.//entry'):
509 d = { 'path': entry.find('.//name').text,
510 'kind': entry.get('kind') }
511 l.append(d)
512 return l
513
514 def list(svn_url_or_wc, rev_number=None, recursive=False):
515 """
516 List the contents of a path as they exist in the repo.
517 """
518 args = ['list', '--xml']
519 if rev_number:
520 args += ['-r', rev_number]
521 if recursive:
522 args += ['-R']
523 args += [safe_path(svn_url_or_wc, rev_number)]
524 xml_string = run_svn(args, no_fail=True)
525 # If svn_url_or_wc is a WC path which hasn't been committed yet,
526 # 'svn list' won't return a valid XML document. Gracefully short-circuit.
527 if not "</lists>" in xml_string:
528 return []
529 return _parse_svn_list_xml(xml_string)