]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn/svnclient.py
Fix keep_revnum vs source_start_rev handling
[svn2svn.git] / svn2svn / svnclient.py
1 """ SVN client functions """
2
3 from shell import run_svn
4 from errors import EmptySVNLog
5
6 import os
7 import time
8 import calendar
9 import operator
10 import urllib
11
12 try:
13 from xml.etree import cElementTree as ET
14 except ImportError:
15 try:
16 from xml.etree import ElementTree as ET
17 except ImportError:
18 try:
19 import cElementTree as ET
20 except ImportError:
21 from elementtree import ElementTree as ET
22
23 _identity_table = "".join(map(chr, range(256)))
24 _forbidden_xml_chars = "".join(
25 set(map(chr, range(32))) - set('\x09\x0A\x0D')
26 )
27
28 valid_svn_actions = "MARD" # The list of known SVN action abbr's, from "svn log"
29
30 def _strip_forbidden_xml_chars(xml_string):
31 """
32 Given an XML string, strips forbidden characters as per the XML spec.
33 (these are all control characters except 0x9, 0xA and 0xD).
34 """
35 return xml_string.translate(_identity_table, _forbidden_xml_chars)
36
37 def safe_path(path, rev_number=None):
38 """
39 Build a path to pass as a SVN command-line arg.
40 """
41 # URL-escape URL's, but leave local WC paths alone
42 if "://" in path:
43 path = urllib.quote(path, ":/+")
44 # Add peg revision
45 if rev_number is not None:
46 path += "@"+str(rev_number)
47 # Else, if path already contains an "@", add a trailing "@" to "escape" the earlier "@".
48 elif "@" in path:
49 path += "@"
50 return path
51
52 def _svn_date_to_timestamp(svn_date):
53 """
54 Parse an SVN date as read from the XML output and return the corresponding
55 timestamp.
56 """
57 # Strip microseconds and timezone (always UTC, hopefully)
58 # XXX there are various ISO datetime parsing routines out there,
59 # cf. http://seehuhn.de/comp/pdate
60 date = svn_date.split('.', 2)[0]
61 time_tuple = time.strptime(date, "%Y-%m-%dT%H:%M:%S")
62 return calendar.timegm(time_tuple)
63
64 def _parse_svn_info_xml(xml_string):
65 """
66 Parse the XML output from an "svn info" command and extract useful information
67 as a dict.
68 """
69 d = {}
70 xml_string = _strip_forbidden_xml_chars(xml_string)
71 tree = ET.fromstring(xml_string)
72 entry = tree.find('.//entry')
73 d['url'] = entry.find('url').text
74 d['kind'] = entry.get('kind')
75 d['revision'] = int(entry.get('revision'))
76 d['repos_url'] = tree.find('.//repository/root').text
77 d['repos_uuid'] = tree.find('.//repository/uuid').text
78 d['last_changed_rev'] = int(tree.find('.//commit').get('revision'))
79 author_element = tree.find('.//commit/author')
80 if author_element is not None:
81 d['last_changed_author'] = author_element.text
82 d['last_changed_date'] = _svn_date_to_timestamp(tree.find('.//commit/date').text)
83 # URL-decode "url" and "repos_url" values, since all paths passed
84 # to run_svn() should be filtered through safe_path() and we don't
85 # want to *double* URL-encode paths which are constructed used these values.
86 d['url'] = urllib.unquote(d['url'])
87 d['repos_url'] = urllib.unquote(d['repos_url'])
88 return d
89
90 def get_kind(svn_repos_url, svn_path, svn_rev, action, paths):
91 """
92 Calculate the "kind"-type of a given URL in the SVN repo.
93 """
94 # By default, just do a simple "svn info" based on passed-in params.
95 info_path = svn_path
96 info_rev = svn_rev
97 if action == 'D':
98 # For deletions, we can't do an "svn info" at this revision.
99 # Need to trace ancestry backwards.
100 parents = []
101 for p in paths:
102 # Build a list of any copy-from's in this log_entry that we're a child of.
103 if p['kind'] == 'dir' and p['copyfrom_revision'] and svn_path.startswith(p['path']+"/"):
104 parents.append(p['path'])
105 if parents:
106 # Use the nearest copy-from'd parent
107 parents.sort()
108 parent = parents[len(parents)-1]
109 for p in paths:
110 if parent == p['path']:
111 info_path = info_path.replace(p['path'], p['copyfrom_path'])
112 info_rev = p['copyfrom_revision']
113 else:
114 # If no parent copy-from's, then we should be able to check this path in
115 # the preceeding revision.
116 info_rev -= 1
117 svn_info = info(svn_repos_url+info_path, info_rev)
118 return svn_info['kind']
119
120 def _parse_svn_log_xml(xml_string):
121 """
122 Parse the XML output from an "svn log" command and extract useful information
123 as a list of dicts (one per log changeset).
124 """
125 l = []
126 xml_string = _strip_forbidden_xml_chars(xml_string)
127 tree = ET.fromstring(xml_string)
128 for entry in tree.findall('logentry'):
129 d = {}
130 d['revision'] = int(entry.get('revision'))
131 # Some revisions don't have authors, most notably the first revision
132 # in a repository.
133 # logentry nodes targeting directories protected by path-based
134 # authentication have no child nodes at all. We return an entry
135 # in that case. Anyway, as it has no path entries, no further
136 # processing will be made.
137 author = entry.find('author')
138 date = entry.find('date')
139 msg = entry.find('msg')
140 d['author'] = author is not None and author.text or "No author"
141 d['date_raw'] = date.text if date is not None else None
142 d['date'] = _svn_date_to_timestamp(date.text) if date is not None else None
143 d['message'] = msg is not None and msg.text and msg.text.replace('\r\n', '\n').replace('\n\r', '\n').replace('\r', '\n') or ""
144 paths = []
145 for path in entry.findall('.//paths/path'):
146 copyfrom_rev = path.get('copyfrom-rev')
147 if copyfrom_rev:
148 copyfrom_rev = int(copyfrom_rev)
149 paths.append({
150 'path': path.text,
151 'kind': path.get('kind'),
152 'action': path.get('action'),
153 'copyfrom_path': path.get('copyfrom-path'),
154 'copyfrom_revision': copyfrom_rev,
155 })
156 # Sort paths (i.e. into hierarchical order), so that process_svn_log_entry()
157 # can process actions in depth-first order.
158 d['changed_paths'] = sorted(paths, key=operator.itemgetter('path'))
159 revprops = []
160 for prop in entry.findall('.//revprops/property'):
161 revprops.append({ 'name': prop.get('name'), 'value': prop.text })
162 d['revprops'] = revprops
163 l.append(d)
164 return l
165
166 def _parse_svn_status_xml(xml_string, base_dir=None, ignore_externals=False):
167 """
168 Parse the XML output from an "svn status" command and extract useful info
169 as a list of dicts (one per status entry).
170 """
171 if base_dir:
172 base_dir = os.path.normcase(base_dir)
173 l = []
174 xml_string = _strip_forbidden_xml_chars(xml_string)
175 tree = ET.fromstring(xml_string)
176 for entry in tree.findall('.//entry'):
177 d = {}
178 path = entry.get('path')
179 if base_dir is not None and os.path.normcase(path).startswith(base_dir):
180 path = path[len(base_dir):].lstrip('/\\')
181 d['path'] = path
182 wc_status = entry.find('wc-status')
183 if wc_status.get('item') == 'external':
184 if ignore_externals:
185 continue
186 status = wc_status.get('item')
187 revision = wc_status.get('revision')
188 if status == 'external':
189 d['type'] = 'external'
190 elif revision is not None:
191 d['type'] = 'normal'
192 else:
193 d['type'] = 'unversioned'
194 d['status'] = status
195 d['revision'] = revision
196 d['props'] = wc_status.get('props')
197 d['copied'] = wc_status.get('copied')
198 l.append(d)
199 return l
200
201 def get_rev(svn_url_or_wc, rev_number):
202 """
203 Evaluate a given SVN revision pattern, to map it to a discrete rev #.
204 """
205 xml_string = run_svn(['info', '--xml', '-r', rev_number, safe_path(svn_url_or_wc, rev_number)], fail_if_stderr=True)
206 info = _parse_svn_info_xml(xml_string)
207 return info['revision']
208
209 def info(svn_url_or_wc, rev_number=None):
210 """
211 Get SVN information for the given URL or working copy, with an optionally
212 specified revision number.
213 Returns a dict as created by _parse_svn_info_xml().
214 """
215 args = ['info', '--xml']
216 if rev_number is not None:
217 args += ["-r", rev_number]
218 args += [safe_path(svn_url_or_wc, rev_number)]
219 xml_string = run_svn(args, fail_if_stderr=True)
220 return _parse_svn_info_xml(xml_string)
221
222 def svn_checkout(svn_url, checkout_dir, rev_number=None):
223 """
224 Checkout the given URL at an optional revision number.
225 """
226 args = ['checkout', '-q']
227 if rev_number is not None:
228 args += ['-r', rev_number]
229 args += [safe_path(svn_url, rev_number), checkout_dir]
230 return run_svn(args)
231
232 def run_svn_log(svn_url_or_wc, rev_start, rev_end, limit, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
233 """
234 Fetch up to 'limit' SVN log entries between the given revisions.
235 """
236 args = ['log', '--xml']
237 if stop_on_copy:
238 args += ['--stop-on-copy']
239 if get_changed_paths:
240 args += ['-v']
241 if get_revprops:
242 args += ['--with-all-revprops']
243 args += ['-r', '%s:%s' % (rev_start, rev_end)]
244 args += ['--limit', str(limit), safe_path(svn_url_or_wc, max(rev_start, rev_end))]
245 xml_string = run_svn(args)
246 return _parse_svn_log_xml(xml_string)
247
248 def status(svn_wc, quiet=False, non_recursive=False):
249 """
250 Get SVN status information about the given working copy.
251 """
252 # Ensure proper stripping by canonicalizing the path
253 svn_wc = os.path.abspath(svn_wc)
254 args = ['status', '--xml', '--ignore-externals']
255 if quiet:
256 args += ['-q']
257 else:
258 args += ['-v']
259 if non_recursive:
260 args += ['-N']
261 xml_string = run_svn(args + [safe_path(svn_wc)])
262 return _parse_svn_status_xml(xml_string, svn_wc, ignore_externals=True)
263
264 def get_svn_versioned_files(svn_wc):
265 """
266 Get the list of versioned files in the SVN working copy.
267 """
268 contents = []
269 for e in status(svn_wc):
270 if e['path'] and e['type'] == 'normal':
271 contents.append(e['path'])
272 return contents
273
274 def get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
275 """
276 Get the first SVN log entry in the requested revision range.
277 """
278 entries = run_svn_log(svn_url, rev_start, rev_end, 1, stop_on_copy, get_changed_paths, get_revprops)
279 if entries:
280 return entries[0]
281 raise EmptySVNLog("No SVN log for %s between revisions %s and %s" %
282 (svn_url, rev_start, rev_end))
283
284 def get_first_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True, get_changed_paths=True):
285 """
286 Get the first log entry after (or at) the given revision number in an SVN branch.
287 By default the revision number is set to 0, which will give you the log
288 entry corresponding to the branch creaction.
289
290 NOTE: to know whether the branch creation corresponds to an SVN import or
291 a copy from another branch, inspect elements of the 'changed_paths' entry
292 in the returned dictionary.
293 """
294 return get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=stop_on_copy, get_changed_paths=get_changed_paths)
295
296 def get_last_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True, get_changed_paths=True):
297 """
298 Get the last log entry before/at the given revision number in an SVN branch.
299 By default the revision number is set to HEAD, which will give you the log
300 entry corresponding to the latest commit in branch.
301 """
302 return get_one_svn_log_entry(svn_url, rev_end, rev_start, stop_on_copy=stop_on_copy, get_changed_paths=get_changed_paths)
303
304
305 log_duration_threshold = 10.0
306 log_min_chunk_length = 10
307 log_max_chunk_length = 10000
308
309 def iter_svn_log_entries(svn_url, first_rev, last_rev, stop_on_copy=False, get_changed_paths=True, get_revprops=False, ancestors=[]):
310 """
311 Iterate over SVN log entries between first_rev and last_rev.
312
313 This function features chunked log fetching so that it isn't too nasty
314 to the SVN server if many entries are requested.
315
316 NOTE: If *not* passing in the explicit (pre-calculated) 'ancestors' list,
317 this chunked log fetching *ONLY* works correctly on paths which
318 are known to have existed unbroken in the SVN repository, e.g. /trunk.
319 Chunked fetching breaks down if a path existed in earlier, then was
320 deleted, and later was re-created. For example, if path was created in r5,
321 then deleted in r1000, and then later re-created in r5000...
322 svn log --stop-on-copy --limit 1 -r 1:50 "path/to/file"
323 --> would yield r5, i.e. the _initial_ creation
324 svn log --stop-on-copy --limit 1 -r 1:HEAD "path/to/file"
325 --> would yield r5000, i.e. the _re-creation_
326 Use run/svnreplay.py:find_svn_ancestors() to pass in the 'ancestors' array
327 so that we can correctly re-trace ancestry here.
328 """
329 svn_info = info(svn_url)
330 svn_repos_url = svn_info['repos_url']
331 #print "iter_svn_log_entries: %s %s:%s" % (svn_url, first_rev, last_rev)
332 if last_rev == "HEAD":
333 last_rev = svn_info['revision']
334 if int(first_rev) == 1:
335 start_log = get_first_svn_log_entry(svn_url, first_rev, last_rev, stop_on_copy=stop_on_copy, get_changed_paths=False)
336 if start_log['revision'] > first_rev:
337 first_rev = start_log['revision']
338 #print "first_rev: %s" % first_rev
339 cur_url = svn_url
340 cur_rev = first_rev
341 cur_anc_idx = None
342 cur_anc_end_rev = None
343 if ancestors:
344 #print ancestors
345 # Crawl ancestry, from oldest to newest
346 for idx in range(len(ancestors)-1, -1, -1): # [n-1,...,0]
347 #print "(pre) Match ancestors[%s]: %s" % (idx, ancestors[idx])
348 cur_url = svn_repos_url+ancestors[idx]['copyfrom_path']
349 cur_anc_idx = idx
350 if first_rev < int(ancestors[idx]['copyfrom_rev']):
351 cur_anc_end_rev = int(ancestors[idx]['copyfrom_rev'])
352 break
353 if cur_anc_end_rev is None:
354 #print "(pre) Match ancestors[0] (final): %s" % (ancestors[0])
355 cur_anc_idx = -1
356 cur_url = svn_repos_url+ancestors[0]['path']
357 chunk_length = log_min_chunk_length
358 while cur_rev <= last_rev:
359 #print "cur_rev:%s cur_anc_end_rev:%s cur_anc_idx:%s %s" % (cur_rev, str(cur_anc_end_rev), cur_anc_idx, cur_url)
360 if cur_anc_end_rev and cur_rev >= cur_anc_end_rev:
361 cur_rev = int(ancestors[cur_anc_idx]['revision'])
362 cur_anc_idx -= 1
363 if cur_anc_idx >= 0:
364 idx = cur_anc_idx
365 #print "(loop) Match ancestors[%s]: %s" % (idx, ancestors[idx])
366 cur_url = svn_repos_url+ancestors[idx]['copyfrom_path']
367 cur_anc_end_rev = int(ancestors[idx]['copyfrom_rev'])
368 else:
369 #print "(loop) Match ancestors[0] (final): %s" % (ancestors[0])
370 cur_url = svn_repos_url+ancestors[0]['path']
371 cur_anc_end_rev = None
372 #print "cur_rev:%s cur_anc_end_rev:%s cur_anc_idx:%s %s" % (cur_rev, str(cur_anc_end_rev), cur_anc_idx, cur_url)
373 start_t = time.time()
374 stop_rev = min(last_rev, cur_rev + chunk_length)
375 stop_rev = min(stop_rev, cur_anc_end_rev) if cur_anc_end_rev else stop_rev
376 entries = run_svn_log(cur_url, cur_rev, stop_rev, chunk_length,
377 stop_on_copy, get_changed_paths, get_revprops)
378 duration = time.time() - start_t
379 if entries:
380 for e in entries:
381 if e['revision'] > last_rev:
382 break
383 # Embed the current URL in the yielded dict, for ancestor cases where
384 # we might have followed a copy-from to some non-original URL.
385 e['url'] = cur_url
386 yield e
387 if e['revision'] >= last_rev:
388 break
389 cur_rev = int(e['revision'])+1
390 else:
391 cur_rev = int(stop_rev)+1
392 # Adapt chunk length based on measured request duration
393 if duration < log_duration_threshold:
394 chunk_length = min(log_max_chunk_length, int(chunk_length * 2.0))
395 elif duration > log_duration_threshold * 2:
396 chunk_length = max(log_min_chunk_length, int(chunk_length / 2.0))
397
398
399 _svn_client_version = None
400
401 def version():
402 """
403 Returns the SVN client version as a tuple.
404
405 The returned tuple only contains numbers, non-digits in version string are
406 silently ignored.
407 """
408 global _svn_client_version
409 if _svn_client_version is None:
410 raw = run_svn(['--version', '-q']).strip()
411 _svn_client_version = tuple(map(int, [x for x in raw.split('.')
412 if x.isdigit()]))
413 return _svn_client_version
414
415
416 def _parse_svn_propget_xml(xml_string):
417 """
418 Parse the XML output from an "svn propget" command and extract useful
419 information as a dict.
420 """
421 d = {}
422 xml_string = _strip_forbidden_xml_chars(xml_string)
423 tree = ET.fromstring(xml_string)
424 prop = tree.find('.//property')
425 d['name'] = prop.get('name')
426 d['value'] = prop is not None and prop.text and prop.text.replace('\r\n', '\n').replace('\n\r', '\n').replace('\r', '\n') or ""
427 return d
428
429 def _parse_svn_proplist_xml(xml_string):
430 """
431 Parse the XML output from an "svn proplist" command and extract list
432 of property-names.
433 """
434 l = []
435 xml_string = _strip_forbidden_xml_chars(xml_string)
436 tree = ET.fromstring(xml_string)
437 for prop in tree.findall('.//property'):
438 l.append(prop.get('name'))
439 return l
440
441 def propget(svn_url_or_wc, prop_name, rev_number=None):
442 """
443 Get the value of a versioned property for the given path.
444 """
445 args = ['propget', '--xml']
446 if rev_number:
447 args += ['-r', rev_number]
448 args += [prop_name, safe_path(svn_url_or_wc, rev_number)]
449 xml_string = run_svn(args)
450 return _parse_svn_propget_xml(xml_string)
451
452 def propget_all(svn_url_or_wc, rev_number=None):
453 """
454 Get the values of all versioned properties for the given path.
455 """
456 l = {}
457 args = ['proplist', '--xml']
458 if rev_number:
459 args += ['-r', rev_number]
460 args += [safe_path(svn_url_or_wc, rev_number)]
461 xml_string = run_svn(args)
462 props = _parse_svn_proplist_xml(xml_string)
463 for prop_name in props:
464 d = propget(svn_url_or_wc, prop_name, rev_number)
465 l[d['name']] = d['value']
466 return l
467
468 def update(path, non_recursive=False):
469 """
470 Update a path in a working-copy.
471 """
472 args = ['update', '--ignore-externals']
473 if non_recursive:
474 args += ['-N']
475 args += [safe_path(path)]
476 run_svn(args)
477
478 def remove(path, force=False):
479 """
480 Remove a file/directory in a working-copy.
481 """
482 args = ['remove']
483 if force:
484 args += ['--force']
485 args += [safe_path(path)]
486 run_svn(args)
487
488 def export(svn_url, rev_number, path, non_recursive=False, force=False):
489 """
490 Export a file from a repo to a local path.
491 """
492 args = ['export', '--ignore-externals', '-r', rev_number]
493 if non_recursive:
494 args += ['-N']
495 if force:
496 args += ['--force']
497 args += [safe_path(svn_url, rev_number), safe_path(path)]
498 run_svn(args)
499
500 def _parse_svn_list_xml(xml_string):
501 """
502 Parse the XML output from an "svn list" command and extract list
503 of contents.
504 """
505 l = []
506 xml_string = _strip_forbidden_xml_chars(xml_string)
507 tree = ET.fromstring(xml_string)
508 d = []
509 for entry in tree.findall('.//entry'):
510 d = { 'path': entry.find('.//name').text,
511 'kind': entry.get('kind') }
512 l.append(d)
513 return l
514
515 def list(svn_url_or_wc, rev_number=None, recursive=False):
516 """
517 List the contents of a path as they exist in the repo.
518 """
519 args = ['list', '--xml']
520 if rev_number:
521 args += ['-r', rev_number]
522 if recursive:
523 args += ['-R']
524 args += [safe_path(svn_url_or_wc, rev_number)]
525 xml_string = run_svn(args, no_fail=True)
526 # If svn_url_or_wc is a WC path which hasn't been committed yet,
527 # 'svn list' won't return a valid XML document. Gracefully short-circuit.
528 if not "</lists>" in xml_string:
529 return []
530 return _parse_svn_list_xml(xml_string)