]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn/svnclient.py
Update process_svn_log_entry() to calculate d['kind'] if missing.
[svn2svn.git] / svn2svn / svnclient.py
1 """ SVN client functions """
2
3 from shell import run_svn
4 from errors import EmptySVNLog
5
6 import os
7 import time
8 import calendar
9 import operator
10
11 try:
12 from xml.etree import cElementTree as ET
13 except ImportError:
14 try:
15 from xml.etree import ElementTree as ET
16 except ImportError:
17 try:
18 import cElementTree as ET
19 except ImportError:
20 from elementtree import ElementTree as ET
21
22 _identity_table = "".join(map(chr, range(256)))
23 _forbidden_xml_chars = "".join(
24 set(map(chr, range(32))) - set('\x09\x0A\x0D')
25 )
26
27
28 def strip_forbidden_xml_chars(xml_string):
29 """
30 Given an XML string, strips forbidden characters as per the XML spec.
31 (these are all control characters except 0x9, 0xA and 0xD).
32 """
33 return xml_string.translate(_identity_table, _forbidden_xml_chars)
34
35
36 def svn_date_to_timestamp(svn_date):
37 """
38 Parse an SVN date as read from the XML output and return the corresponding
39 timestamp.
40 """
41 # Strip microseconds and timezone (always UTC, hopefully)
42 # XXX there are various ISO datetime parsing routines out there,
43 # cf. http://seehuhn.de/comp/pdate
44 date = svn_date.split('.', 2)[0]
45 time_tuple = time.strptime(date, "%Y-%m-%dT%H:%M:%S")
46 return calendar.timegm(time_tuple)
47
48 def parse_svn_info_xml(xml_string):
49 """
50 Parse the XML output from an "svn info" command and extract useful information
51 as a dict.
52 """
53 d = {}
54 xml_string = strip_forbidden_xml_chars(xml_string)
55 tree = ET.fromstring(xml_string)
56 entry = tree.find('.//entry')
57 d['url'] = entry.find('url').text
58 d['kind'] = entry.get('kind')
59 d['revision'] = int(entry.get('revision'))
60 d['repos_url'] = tree.find('.//repository/root').text
61 d['repos_uuid'] = tree.find('.//repository/uuid').text
62 d['last_changed_rev'] = int(tree.find('.//commit').get('revision'))
63 author_element = tree.find('.//commit/author')
64 if author_element is not None:
65 d['last_changed_author'] = author_element.text
66 d['last_changed_date'] = svn_date_to_timestamp(tree.find('.//commit/date').text)
67 return d
68
69 def get_kind(svn_repos_url, svn_path, svn_rev, action, paths):
70 """
71 Calculate the "kind"-type of a given URL in the SVN repo.
72 """
73 # By default, just do a simple "svn info" based on passed-in params.
74 info_path = svn_path
75 info_rev = svn_rev
76 if action == 'D':
77 # For deletions, we can't do an "svn info" at this revision.
78 # Need to trace ancestry backwards.
79 parents = []
80 for p in paths:
81 # Build a list of any copy-from's in this log_entry that we're a child of.
82 if p['kind'] == 'dir' and p['copyfrom_revision'] and svn_path.startswith(p['path']+"/"):
83 parents.append(p['path'])
84 if parents:
85 # Use the nearest copy-from'd parent
86 parents.sort()
87 parent = parents[len(parents)-1]
88 for p in paths:
89 if parent == p['path']:
90 info_path = info_path.replace(p['path'], p['copyfrom_path'])
91 info_rev = p['copyfrom_revision']
92 else:
93 # If no parent copy-from's, then we should be able to check this path in
94 # the preceeding revision.
95 info_rev -= 1
96 info = get_svn_info(svn_repos_url+info_path, info_rev)
97 return info['kind']
98
99 def parse_svn_log_xml(xml_string):
100 """
101 Parse the XML output from an "svn log" command and extract useful information
102 as a list of dicts (one per log changeset).
103 """
104 l = []
105 xml_string = strip_forbidden_xml_chars(xml_string)
106 tree = ET.fromstring(xml_string)
107 for entry in tree.findall('logentry'):
108 d = {}
109 d['revision'] = int(entry.get('revision'))
110 # Some revisions don't have authors, most notably the first revision
111 # in a repository.
112 # logentry nodes targeting directories protected by path-based
113 # authentication have no child nodes at all. We return an entry
114 # in that case. Anyway, as it has no path entries, no further
115 # processing will be made.
116 author = entry.find('author')
117 date = entry.find('date')
118 msg = entry.find('msg')
119 d['author'] = author is not None and author.text or "No author"
120 if date is not None:
121 d['date'] = svn_date_to_timestamp(date.text)
122 else:
123 d['date'] = None
124 d['message'] = msg is not None and msg.text and msg.text.replace('\r\n', '\n').replace('\n\r', '\n').replace('\r', '\n') or ""
125 paths = []
126 for path in entry.findall('.//paths/path'):
127 copyfrom_rev = path.get('copyfrom-rev')
128 if copyfrom_rev:
129 copyfrom_rev = int(copyfrom_rev)
130 paths.append({
131 'path': path.text,
132 'kind': path.get('kind'),
133 'action': path.get('action'),
134 'copyfrom_path': path.get('copyfrom-path'),
135 'copyfrom_revision': copyfrom_rev,
136 })
137 # Sort paths (i.e. into hierarchical order), so that process_svn_log_entry()
138 # can process actions in depth-first order.
139 d['changed_paths'] = sorted(paths, key=operator.itemgetter('path'))
140 revprops = []
141 for prop in entry.findall('.//revprops/property'):
142 revprops.append({ 'name': prop.get('name'), 'value': prop.text })
143 d['revprops'] = revprops
144 l.append(d)
145 return l
146
147 def parse_svn_status_xml(xml_string, base_dir=None, ignore_externals=False):
148 """
149 Parse the XML output from an "svn status" command and extract useful info
150 as a list of dicts (one per status entry).
151 """
152 if base_dir:
153 base_dir = os.path.normcase(base_dir)
154 l = []
155 xml_string = strip_forbidden_xml_chars(xml_string)
156 tree = ET.fromstring(xml_string)
157 for entry in tree.findall('.//entry'):
158 d = {}
159 path = entry.get('path')
160 if base_dir is not None:
161 assert os.path.normcase(path).startswith(base_dir)
162 path = path[len(base_dir):].lstrip('/\\')
163 d['path'] = path
164 wc_status = entry.find('wc-status')
165 if wc_status.get('item') == 'external':
166 if ignore_externals:
167 continue
168 status = wc_status.get('item')
169 revision = wc_status.get('revision')
170 if status == 'external':
171 d['type'] = 'external'
172 elif revision is not None:
173 d['type'] = 'normal'
174 else:
175 d['type'] = 'unversioned'
176 d['status'] = status
177 d['revision'] = revision
178 d['props'] = wc_status.get('props')
179 d['copied'] = wc_status.get('copied')
180 l.append(d)
181 return l
182
183 def get_svn_rev(svn_url_or_wc, rev_number):
184 """
185 Evaluate a given SVN revision pattern, to map it to a discrete rev #.
186 """
187 xml_string = run_svn(['info', '--xml', '-r', rev_number, svn_url_or_wc], fail_if_stderr=True)
188 info = parse_svn_info_xml(xml_string)
189 return info['revision']
190
191 def get_svn_info(svn_url_or_wc, rev_number=None):
192 """
193 Get SVN information for the given URL or working copy, with an optionally
194 specified revision number.
195 Returns a dict as created by parse_svn_info_xml().
196 """
197 args = ['info', '--xml']
198 if rev_number is not None:
199 args += ["-r", rev_number, svn_url_or_wc+"@"+str(rev_number)]
200 else:
201 args += [svn_url_or_wc]
202 xml_string = run_svn(args, fail_if_stderr=True)
203 return parse_svn_info_xml(xml_string)
204
205 def svn_checkout(svn_url, checkout_dir, rev_number=None):
206 """
207 Checkout the given URL at an optional revision number.
208 """
209 args = ['checkout', '-q']
210 if rev_number is not None:
211 args += ['-r', rev_number]
212 args += [svn_url, checkout_dir]
213 return run_svn(args)
214
215 def run_svn_log(svn_url_or_wc, rev_start, rev_end, limit, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
216 """
217 Fetch up to 'limit' SVN log entries between the given revisions.
218 """
219 args = ['log', '--xml']
220 if stop_on_copy:
221 args += ['--stop-on-copy']
222 if get_changed_paths:
223 args += ['-v']
224 if get_revprops:
225 args += ['--with-all-revprops']
226 url = str(svn_url_or_wc)
227 args += ['-r', '%s:%s' % (rev_start, rev_end)]
228 if not "@" in svn_url_or_wc:
229 url = "%s@%s" % (svn_url_or_wc, str(max(rev_start, rev_end)))
230 args += ['--limit', str(limit), url]
231 xml_string = run_svn(args)
232 return parse_svn_log_xml(xml_string)
233
234 def get_svn_status(svn_wc, quiet=False, no_recursive=False):
235 """
236 Get SVN status information about the given working copy.
237 """
238 # Ensure proper stripping by canonicalizing the path
239 svn_wc = os.path.abspath(svn_wc)
240 args = ['status', '--xml', '--ignore-externals']
241 if quiet:
242 args += ['-q']
243 else:
244 args += ['-v']
245 if no_recursive:
246 args += ['-N']
247 xml_string = run_svn(args + [svn_wc])
248 return parse_svn_status_xml(xml_string, svn_wc, ignore_externals=True)
249
250 def get_svn_versioned_files(svn_wc):
251 """
252 Get the list of versioned files in the SVN working copy.
253 """
254 contents = []
255 for e in get_svn_status(svn_wc):
256 if e['path'] and e['type'] == 'normal':
257 contents.append(e['path'])
258 return contents
259
260 def get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
261 """
262 Get the first SVN log entry in the requested revision range.
263 """
264 entries = run_svn_log(svn_url, rev_start, rev_end, 1, stop_on_copy, get_changed_paths, get_revprops)
265 if entries:
266 return entries[0]
267 raise EmptySVNLog("No SVN log for %s between revisions %s and %s" %
268 (svn_url, rev_start, rev_end))
269
270 def get_first_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
271 """
272 Get the first log entry after (or at) the given revision number in an SVN branch.
273 By default the revision number is set to 0, which will give you the log
274 entry corresponding to the branch creaction.
275
276 NOTE: to know whether the branch creation corresponds to an SVN import or
277 a copy from another branch, inspect elements of the 'changed_paths' entry
278 in the returned dictionary.
279 """
280 return get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True, get_changed_paths=True)
281
282 def get_last_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
283 """
284 Get the last log entry before/at the given revision number in an SVN branch.
285 By default the revision number is set to HEAD, which will give you the log
286 entry corresponding to the latest commit in branch.
287 """
288 return get_one_svn_log_entry(svn_url, rev_end, rev_start, stop_on_copy=True, get_changed_paths=True)
289
290
291 log_duration_threshold = 10.0
292 log_min_chunk_length = 10
293 log_max_chunk_length = 10000
294
295 def iter_svn_log_entries(svn_url, first_rev, last_rev, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
296 """
297 Iterate over SVN log entries between first_rev and last_rev.
298
299 This function features chunked log fetching so that it isn't too nasty
300 to the SVN server if many entries are requested.
301
302 NOTE: This chunked log fetching *ONLY* works correctly on paths which
303 are known to have existed unbroken in the SVN repository, e.g. /trunk.
304 Chunked fetching breaks down if a path existed in earlier, then was
305 deleted, and later was re-created. For example, if path was created in r5,
306 then deleted in r1000, and then later re-created in r5000...
307 svn log --stop-on-copy --limit 1 -r 1:50 "path/to/file"
308 --> would yield r5, i.e. the _initial_ creation
309 svn log --stop-on-copy --limit 1 -r 1:HEAD "path/to/file"
310 --> would yield r5000, i.e. the _re-creation_
311 In theory this might work if we always search "backwards", searching from
312 the end going forward rather than forward going to the end...
313 """
314 if last_rev == "HEAD":
315 info = get_svn_info(svn_url)
316 last_rev = info['revision']
317 cur_rev = first_rev
318 chunk_length = log_min_chunk_length
319 while cur_rev <= last_rev:
320 start_t = time.time()
321 stop_rev = min(last_rev, cur_rev + chunk_length)
322 entries = run_svn_log(svn_url, cur_rev, stop_rev, chunk_length,
323 stop_on_copy, get_changed_paths, get_revprops)
324 duration = time.time() - start_t
325 if entries:
326 for e in entries:
327 if e['revision'] > last_rev:
328 break
329 yield e
330 if e['revision'] >= last_rev:
331 break
332 cur_rev = e['revision']+1
333 else:
334 cur_rev = int(stop_rev)+1
335 # Adapt chunk length based on measured request duration
336 if duration < log_duration_threshold:
337 chunk_length = min(log_max_chunk_length, int(chunk_length * 2.0))
338 elif duration > log_duration_threshold * 2:
339 chunk_length = max(log_min_chunk_length, int(chunk_length / 2.0))
340
341
342 _svn_client_version = None
343
344 def get_svn_client_version():
345 """
346 Returns the SVN client version as a tuple.
347
348 The returned tuple only contains numbers, non-digits in version string are
349 silently ignored.
350 """
351 global _svn_client_version
352 if _svn_client_version is None:
353 raw = run_svn(['--version', '-q']).strip()
354 _svn_client_version = tuple(map(int, [x for x in raw.split('.')
355 if x.isdigit()]))
356 return _svn_client_version