]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn/svnclient.py
Fix another --keep-revnum edge-case
[svn2svn.git] / svn2svn / svnclient.py
1 """ SVN client functions """
2
3 from shell import run_svn
4 from errors import EmptySVNLog
5
6 import os
7 import time
8 import calendar
9 import operator
10
11 try:
12 from xml.etree import cElementTree as ET
13 except ImportError:
14 try:
15 from xml.etree import ElementTree as ET
16 except ImportError:
17 try:
18 import cElementTree as ET
19 except ImportError:
20 from elementtree import ElementTree as ET
21
22 _identity_table = "".join(map(chr, range(256)))
23 _forbidden_xml_chars = "".join(
24 set(map(chr, range(32))) - set('\x09\x0A\x0D')
25 )
26
27
28 def strip_forbidden_xml_chars(xml_string):
29 """
30 Given an XML string, strips forbidden characters as per the XML spec.
31 (these are all control characters except 0x9, 0xA and 0xD).
32 """
33 return xml_string.translate(_identity_table, _forbidden_xml_chars)
34
35
36 def svn_date_to_timestamp(svn_date):
37 """
38 Parse an SVN date as read from the XML output and return the corresponding
39 timestamp.
40 """
41 # Strip microseconds and timezone (always UTC, hopefully)
42 # XXX there are various ISO datetime parsing routines out there,
43 # cf. http://seehuhn.de/comp/pdate
44 date = svn_date.split('.', 2)[0]
45 time_tuple = time.strptime(date, "%Y-%m-%dT%H:%M:%S")
46 return calendar.timegm(time_tuple)
47
48 def parse_svn_info_xml(xml_string):
49 """
50 Parse the XML output from an "svn info" command and extract useful information
51 as a dict.
52 """
53 d = {}
54 xml_string = strip_forbidden_xml_chars(xml_string)
55 tree = ET.fromstring(xml_string)
56 entry = tree.find('.//entry')
57 d['url'] = entry.find('url').text
58 d['kind'] = entry.get('kind')
59 d['revision'] = int(entry.get('revision'))
60 d['repos_url'] = tree.find('.//repository/root').text
61 d['repos_uuid'] = tree.find('.//repository/uuid').text
62 d['last_changed_rev'] = int(tree.find('.//commit').get('revision'))
63 author_element = tree.find('.//commit/author')
64 if author_element is not None:
65 d['last_changed_author'] = author_element.text
66 d['last_changed_date'] = svn_date_to_timestamp(tree.find('.//commit/date').text)
67 return d
68
69 def get_kind(svn_repos_url, svn_path, svn_rev, action, paths):
70 """
71 Calculate the "kind"-type of a given URL in the SVN repo.
72 """
73 # By default, just do a simple "svn info" based on passed-in params.
74 info_path = svn_path
75 info_rev = svn_rev
76 if action == 'D':
77 # For deletions, we can't do an "svn info" at this revision.
78 # Need to trace ancestry backwards.
79 parents = []
80 for p in paths:
81 # Build a list of any copy-from's in this log_entry that we're a child of.
82 if p['kind'] == 'dir' and p['copyfrom_revision'] and svn_path.startswith(p['path']+"/"):
83 parents.append(p['path'])
84 if parents:
85 # Use the nearest copy-from'd parent
86 parents.sort()
87 parent = parents[len(parents)-1]
88 for p in paths:
89 if parent == p['path']:
90 info_path = info_path.replace(p['path'], p['copyfrom_path'])
91 info_rev = p['copyfrom_revision']
92 else:
93 # If no parent copy-from's, then we should be able to check this path in
94 # the preceeding revision.
95 info_rev -= 1
96 info = get_svn_info(svn_repos_url+info_path, info_rev)
97 return info['kind']
98
99 def parse_svn_log_xml(xml_string):
100 """
101 Parse the XML output from an "svn log" command and extract useful information
102 as a list of dicts (one per log changeset).
103 """
104 l = []
105 xml_string = strip_forbidden_xml_chars(xml_string)
106 tree = ET.fromstring(xml_string)
107 for entry in tree.findall('logentry'):
108 d = {}
109 d['revision'] = int(entry.get('revision'))
110 # Some revisions don't have authors, most notably the first revision
111 # in a repository.
112 # logentry nodes targeting directories protected by path-based
113 # authentication have no child nodes at all. We return an entry
114 # in that case. Anyway, as it has no path entries, no further
115 # processing will be made.
116 author = entry.find('author')
117 date = entry.find('date')
118 msg = entry.find('msg')
119 d['author'] = author is not None and author.text or "No author"
120 d['date_raw'] = date.text if date is not None else None
121 d['date'] = svn_date_to_timestamp(date.text) if date is not None else None
122 d['message'] = msg is not None and msg.text and msg.text.replace('\r\n', '\n').replace('\n\r', '\n').replace('\r', '\n') or ""
123 paths = []
124 for path in entry.findall('.//paths/path'):
125 copyfrom_rev = path.get('copyfrom-rev')
126 if copyfrom_rev:
127 copyfrom_rev = int(copyfrom_rev)
128 paths.append({
129 'path': path.text,
130 'kind': path.get('kind'),
131 'action': path.get('action'),
132 'copyfrom_path': path.get('copyfrom-path'),
133 'copyfrom_revision': copyfrom_rev,
134 })
135 # Sort paths (i.e. into hierarchical order), so that process_svn_log_entry()
136 # can process actions in depth-first order.
137 d['changed_paths'] = sorted(paths, key=operator.itemgetter('path'))
138 revprops = []
139 for prop in entry.findall('.//revprops/property'):
140 revprops.append({ 'name': prop.get('name'), 'value': prop.text })
141 d['revprops'] = revprops
142 l.append(d)
143 return l
144
145 def parse_svn_status_xml(xml_string, base_dir=None, ignore_externals=False):
146 """
147 Parse the XML output from an "svn status" command and extract useful info
148 as a list of dicts (one per status entry).
149 """
150 if base_dir:
151 base_dir = os.path.normcase(base_dir)
152 l = []
153 xml_string = strip_forbidden_xml_chars(xml_string)
154 tree = ET.fromstring(xml_string)
155 for entry in tree.findall('.//entry'):
156 d = {}
157 path = entry.get('path')
158 if base_dir is not None:
159 assert os.path.normcase(path).startswith(base_dir)
160 path = path[len(base_dir):].lstrip('/\\')
161 d['path'] = path
162 wc_status = entry.find('wc-status')
163 if wc_status.get('item') == 'external':
164 if ignore_externals:
165 continue
166 status = wc_status.get('item')
167 revision = wc_status.get('revision')
168 if status == 'external':
169 d['type'] = 'external'
170 elif revision is not None:
171 d['type'] = 'normal'
172 else:
173 d['type'] = 'unversioned'
174 d['status'] = status
175 d['revision'] = revision
176 d['props'] = wc_status.get('props')
177 d['copied'] = wc_status.get('copied')
178 l.append(d)
179 return l
180
181 def get_svn_rev(svn_url_or_wc, rev_number):
182 """
183 Evaluate a given SVN revision pattern, to map it to a discrete rev #.
184 """
185 xml_string = run_svn(['info', '--xml', '-r', rev_number, svn_url_or_wc], fail_if_stderr=True)
186 info = parse_svn_info_xml(xml_string)
187 return info['revision']
188
189 def get_svn_info(svn_url_or_wc, rev_number=None):
190 """
191 Get SVN information for the given URL or working copy, with an optionally
192 specified revision number.
193 Returns a dict as created by parse_svn_info_xml().
194 """
195 args = ['info', '--xml']
196 if rev_number is not None:
197 args += ["-r", rev_number, svn_url_or_wc+"@"+str(rev_number)]
198 else:
199 args += [svn_url_or_wc]
200 xml_string = run_svn(args, fail_if_stderr=True)
201 return parse_svn_info_xml(xml_string)
202
203 def svn_checkout(svn_url, checkout_dir, rev_number=None):
204 """
205 Checkout the given URL at an optional revision number.
206 """
207 args = ['checkout', '-q']
208 if rev_number is not None:
209 args += ['-r', rev_number]
210 args += [svn_url, checkout_dir]
211 return run_svn(args)
212
213 def run_svn_log(svn_url_or_wc, rev_start, rev_end, limit, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
214 """
215 Fetch up to 'limit' SVN log entries between the given revisions.
216 """
217 args = ['log', '--xml']
218 if stop_on_copy:
219 args += ['--stop-on-copy']
220 if get_changed_paths:
221 args += ['-v']
222 if get_revprops:
223 args += ['--with-all-revprops']
224 url = str(svn_url_or_wc)
225 args += ['-r', '%s:%s' % (rev_start, rev_end)]
226 if not "@" in svn_url_or_wc:
227 url = "%s@%s" % (svn_url_or_wc, str(max(rev_start, rev_end)))
228 args += ['--limit', str(limit), url]
229 xml_string = run_svn(args)
230 return parse_svn_log_xml(xml_string)
231
232 def get_svn_status(svn_wc, quiet=False, no_recursive=False):
233 """
234 Get SVN status information about the given working copy.
235 """
236 # Ensure proper stripping by canonicalizing the path
237 svn_wc = os.path.abspath(svn_wc)
238 args = ['status', '--xml', '--ignore-externals']
239 if quiet:
240 args += ['-q']
241 else:
242 args += ['-v']
243 if no_recursive:
244 args += ['-N']
245 xml_string = run_svn(args + [svn_wc])
246 return parse_svn_status_xml(xml_string, svn_wc, ignore_externals=True)
247
248 def get_svn_versioned_files(svn_wc):
249 """
250 Get the list of versioned files in the SVN working copy.
251 """
252 contents = []
253 for e in get_svn_status(svn_wc):
254 if e['path'] and e['type'] == 'normal':
255 contents.append(e['path'])
256 return contents
257
258 def get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
259 """
260 Get the first SVN log entry in the requested revision range.
261 """
262 entries = run_svn_log(svn_url, rev_start, rev_end, 1, stop_on_copy, get_changed_paths, get_revprops)
263 if entries:
264 return entries[0]
265 raise EmptySVNLog("No SVN log for %s between revisions %s and %s" %
266 (svn_url, rev_start, rev_end))
267
268 def get_first_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
269 """
270 Get the first log entry after (or at) the given revision number in an SVN branch.
271 By default the revision number is set to 0, which will give you the log
272 entry corresponding to the branch creaction.
273
274 NOTE: to know whether the branch creation corresponds to an SVN import or
275 a copy from another branch, inspect elements of the 'changed_paths' entry
276 in the returned dictionary.
277 """
278 return get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True, get_changed_paths=True)
279
280 def get_last_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
281 """
282 Get the last log entry before/at the given revision number in an SVN branch.
283 By default the revision number is set to HEAD, which will give you the log
284 entry corresponding to the latest commit in branch.
285 """
286 return get_one_svn_log_entry(svn_url, rev_end, rev_start, stop_on_copy=True, get_changed_paths=True)
287
288
289 log_duration_threshold = 10.0
290 log_min_chunk_length = 10
291 log_max_chunk_length = 10000
292
293 def iter_svn_log_entries(svn_url, first_rev, last_rev, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
294 """
295 Iterate over SVN log entries between first_rev and last_rev.
296
297 This function features chunked log fetching so that it isn't too nasty
298 to the SVN server if many entries are requested.
299
300 NOTE: This chunked log fetching *ONLY* works correctly on paths which
301 are known to have existed unbroken in the SVN repository, e.g. /trunk.
302 Chunked fetching breaks down if a path existed in earlier, then was
303 deleted, and later was re-created. For example, if path was created in r5,
304 then deleted in r1000, and then later re-created in r5000...
305 svn log --stop-on-copy --limit 1 -r 1:50 "path/to/file"
306 --> would yield r5, i.e. the _initial_ creation
307 svn log --stop-on-copy --limit 1 -r 1:HEAD "path/to/file"
308 --> would yield r5000, i.e. the _re-creation_
309 In theory this might work if we always search "backwards", searching from
310 the end going forward rather than forward going to the end...
311 """
312 if last_rev == "HEAD":
313 info = get_svn_info(svn_url)
314 last_rev = info['revision']
315 cur_rev = first_rev
316 chunk_length = log_min_chunk_length
317 while cur_rev <= last_rev:
318 start_t = time.time()
319 stop_rev = min(last_rev, cur_rev + chunk_length)
320 entries = run_svn_log(svn_url, cur_rev, stop_rev, chunk_length,
321 stop_on_copy, get_changed_paths, get_revprops)
322 duration = time.time() - start_t
323 if entries:
324 for e in entries:
325 if e['revision'] > last_rev:
326 break
327 yield e
328 if e['revision'] >= last_rev:
329 break
330 cur_rev = e['revision']+1
331 else:
332 cur_rev = int(stop_rev)+1
333 # Adapt chunk length based on measured request duration
334 if duration < log_duration_threshold:
335 chunk_length = min(log_max_chunk_length, int(chunk_length * 2.0))
336 elif duration > log_duration_threshold * 2:
337 chunk_length = max(log_min_chunk_length, int(chunk_length / 2.0))
338
339
340 _svn_client_version = None
341
342 def get_svn_client_version():
343 """
344 Returns the SVN client version as a tuple.
345
346 The returned tuple only contains numbers, non-digits in version string are
347 silently ignored.
348 """
349 global _svn_client_version
350 if _svn_client_version is None:
351 raw = run_svn(['--version', '-q']).strip()
352 _svn_client_version = tuple(map(int, [x for x in raw.split('.')
353 if x.isdigit()]))
354 return _svn_client_version
355
356
357 def parse_svn_propget_xml(xml_string):
358 """
359 Parse the XML output from an "svn propget" command and extract useful
360 information as a dict.
361 """
362 d = {}
363 xml_string = strip_forbidden_xml_chars(xml_string)
364 tree = ET.fromstring(xml_string)
365 prop = tree.find('.//property')
366 d['name'] = prop.get('name')
367 d['value'] = prop is not None and prop.text and prop.text.replace('\r\n', '\n').replace('\n\r', '\n').replace('\r', '\n') or ""
368 return d
369
370 def parse_svn_proplist_xml(xml_string):
371 """
372 Parse the XML output from an "svn proplist" command and extract list
373 of property-names.
374 """
375 l = []
376 xml_string = strip_forbidden_xml_chars(xml_string)
377 tree = ET.fromstring(xml_string)
378 for prop in tree.findall('.//property'):
379 l.append(prop.get('name'))
380 return l
381
382 def get_prop_value(svn_url_or_wc, prop_name, rev_number=None):
383 """
384 Get the value of a versioned property for the given path.
385 """
386 args = ['propget', '--xml']
387 url = str(svn_url_or_wc)
388 if rev_number:
389 args += ['-r', rev_number]
390 if not "@" in svn_url_or_wc:
391 url = "%s@%s" % (svn_url_or_wc, str(rev_number))
392 args += [prop_name, url]
393 xml_string = run_svn(args)
394 return parse_svn_propget_xml(xml_string)
395
396 def get_all_props(svn_url_or_wc, rev_number=None):
397 """
398 Get the values of all versioned properties for the given path.
399 """
400 l = {}
401 args = ['proplist', '--xml']
402 url = str(svn_url_or_wc)
403 if rev_number:
404 args += ['-r', rev_number]
405 if not "@" in svn_url_or_wc:
406 url = "%s@%s" % (svn_url_or_wc, str(rev_number))
407 args += [url]
408 xml_string = run_svn(args)
409 props = parse_svn_proplist_xml(xml_string)
410 for prop_name in props:
411 d = get_prop_value(svn_url_or_wc, prop_name, rev_number)
412 l[d['name']] = d['value']
413 return l