]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn/svnclient.py
e5e8abc38ca4eaf18a3c19e8bad733fbe29ecdef
[svn2svn.git] / svn2svn / svnclient.py
1 """ SVN client functions """
2
3 from shell import run_svn
4 from errors import EmptySVNLog
5
6 import os
7 import time
8 import calendar
9 import operator
10
11 try:
12 from xml.etree import cElementTree as ET
13 except ImportError:
14 try:
15 from xml.etree import ElementTree as ET
16 except ImportError:
17 try:
18 import cElementTree as ET
19 except ImportError:
20 from elementtree import ElementTree as ET
21
22 _identity_table = "".join(map(chr, range(256)))
23 _forbidden_xml_chars = "".join(
24 set(map(chr, range(32))) - set('\x09\x0A\x0D')
25 )
26
27
28 def strip_forbidden_xml_chars(xml_string):
29 """
30 Given an XML string, strips forbidden characters as per the XML spec.
31 (these are all control characters except 0x9, 0xA and 0xD).
32 """
33 return xml_string.translate(_identity_table, _forbidden_xml_chars)
34
35
36 def svn_date_to_timestamp(svn_date):
37 """
38 Parse an SVN date as read from the XML output and return the corresponding
39 timestamp.
40 """
41 # Strip microseconds and timezone (always UTC, hopefully)
42 # XXX there are various ISO datetime parsing routines out there,
43 # cf. http://seehuhn.de/comp/pdate
44 date = svn_date.split('.', 2)[0]
45 time_tuple = time.strptime(date, "%Y-%m-%dT%H:%M:%S")
46 return calendar.timegm(time_tuple)
47
48 def parse_svn_info_xml(xml_string):
49 """
50 Parse the XML output from an "svn info" command and extract useful information
51 as a dict.
52 """
53 d = {}
54 xml_string = strip_forbidden_xml_chars(xml_string)
55 tree = ET.fromstring(xml_string)
56 entry = tree.find('.//entry')
57 d['url'] = entry.find('url').text
58 d['kind'] = entry.get('kind')
59 d['revision'] = int(entry.get('revision'))
60 d['repos_url'] = tree.find('.//repository/root').text
61 d['repos_uuid'] = tree.find('.//repository/uuid').text
62 d['last_changed_rev'] = int(tree.find('.//commit').get('revision'))
63 author_element = tree.find('.//commit/author')
64 if author_element is not None:
65 d['last_changed_author'] = author_element.text
66 d['last_changed_date'] = svn_date_to_timestamp(tree.find('.//commit/date').text)
67 return d
68
69 def parse_svn_log_xml(xml_string):
70 """
71 Parse the XML output from an "svn log" command and extract useful information
72 as a list of dicts (one per log changeset).
73 """
74 l = []
75 xml_string = strip_forbidden_xml_chars(xml_string)
76 tree = ET.fromstring(xml_string)
77 for entry in tree.findall('logentry'):
78 d = {}
79 d['revision'] = int(entry.get('revision'))
80 # Some revisions don't have authors, most notably the first revision
81 # in a repository.
82 # logentry nodes targeting directories protected by path-based
83 # authentication have no child nodes at all. We return an entry
84 # in that case. Anyway, as it has no path entries, no further
85 # processing will be made.
86 author = entry.find('author')
87 date = entry.find('date')
88 msg = entry.find('msg')
89 d['author'] = author is not None and author.text or "No author"
90 if date is not None:
91 d['date'] = svn_date_to_timestamp(date.text)
92 else:
93 d['date'] = None
94 d['message'] = msg is not None and msg.text.replace('\r\n', '\n').replace('\n\r', '\n').replace('\r', '\n') or ""
95 paths = []
96 for path in entry.findall('.//paths/path'):
97 copyfrom_rev = path.get('copyfrom-rev')
98 if copyfrom_rev:
99 copyfrom_rev = int(copyfrom_rev)
100 paths.append({
101 'path': path.text,
102 'kind': path.get('kind'),
103 'action': path.get('action'),
104 'copyfrom_path': path.get('copyfrom-path'),
105 'copyfrom_revision': copyfrom_rev,
106 })
107 # Sort paths (i.e. into hierarchical order), so that process_svn_log_entry()
108 # can process actions in depth-first order.
109 d['changed_paths'] = sorted(paths, key=operator.itemgetter('path'))
110 revprops = []
111 for prop in entry.findall('.//revprops/property'):
112 revprops.append({ 'name': prop.get('name'), 'value': prop.text })
113 d['revprops'] = revprops
114 l.append(d)
115 return l
116
117 def parse_svn_status_xml(xml_string, base_dir=None, ignore_externals=False):
118 """
119 Parse the XML output from an "svn status" command and extract useful info
120 as a list of dicts (one per status entry).
121 """
122 if base_dir:
123 base_dir = os.path.normcase(base_dir)
124 l = []
125 xml_string = strip_forbidden_xml_chars(xml_string)
126 tree = ET.fromstring(xml_string)
127 for entry in tree.findall('.//entry'):
128 d = {}
129 path = entry.get('path')
130 if base_dir is not None:
131 assert os.path.normcase(path).startswith(base_dir)
132 path = path[len(base_dir):].lstrip('/\\')
133 d['path'] = path
134 wc_status = entry.find('wc-status')
135 if wc_status.get('item') == 'external':
136 if ignore_externals:
137 continue
138 status = wc_status.get('item')
139 revision = wc_status.get('revision')
140 if status == 'external':
141 d['type'] = 'external'
142 elif revision is not None:
143 d['type'] = 'normal'
144 else:
145 d['type'] = 'unversioned'
146 d['status'] = status
147 d['revision'] = revision
148 d['props'] = wc_status.get('props')
149 d['copied'] = wc_status.get('copied')
150 l.append(d)
151 return l
152
153 def get_svn_info(svn_url_or_wc, rev_number=None):
154 """
155 Get SVN information for the given URL or working copy, with an optionally
156 specified revision number.
157 Returns a dict as created by parse_svn_info_xml().
158 """
159 args = ['info', '--xml']
160 if rev_number is not None:
161 args += ["-r", rev_number, svn_url_or_wc+"@"+str(rev_number)]
162 else:
163 args += [svn_url_or_wc]
164 xml_string = run_svn(args, fail_if_stderr=True)
165 return parse_svn_info_xml(xml_string)
166
167 def svn_checkout(svn_url, checkout_dir, rev_number=None):
168 """
169 Checkout the given URL at an optional revision number.
170 """
171 args = ['checkout', '-q']
172 if rev_number is not None:
173 args += ['-r', rev_number]
174 args += [svn_url, checkout_dir]
175 return run_svn(args)
176
177 def run_svn_log(svn_url_or_wc, rev_start, rev_end, limit, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
178 """
179 Fetch up to 'limit' SVN log entries between the given revisions.
180 """
181 args = ['log', '--xml']
182 if stop_on_copy:
183 args += ['--stop-on-copy']
184 if get_changed_paths:
185 args += ['-v']
186 if get_revprops:
187 args += ['--with-all-revprops']
188 url = str(svn_url_or_wc)
189 args += ['-r', '%s:%s' % (rev_start, rev_end)]
190 if not "@" in svn_url_or_wc:
191 url = "%s@%s" % (svn_url_or_wc, str(max(rev_start, rev_end)))
192 args += ['--limit', str(limit), url]
193 xml_string = run_svn(args)
194 return parse_svn_log_xml(xml_string)
195
196 def get_svn_status(svn_wc, quiet=False, no_recursive=False):
197 """
198 Get SVN status information about the given working copy.
199 """
200 # Ensure proper stripping by canonicalizing the path
201 svn_wc = os.path.abspath(svn_wc)
202 args = ['status', '--xml', '--ignore-externals']
203 if quiet:
204 args += ['-q']
205 else:
206 args += ['-v']
207 if no_recursive:
208 args += ['-N']
209 xml_string = run_svn(args + [svn_wc])
210 return parse_svn_status_xml(xml_string, svn_wc, ignore_externals=True)
211
212 def get_svn_versioned_files(svn_wc):
213 """
214 Get the list of versioned files in the SVN working copy.
215 """
216 contents = []
217 for e in get_svn_status(svn_wc):
218 if e['path'] and e['type'] == 'normal':
219 contents.append(e['path'])
220 return contents
221
222 def get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
223 """
224 Get the first SVN log entry in the requested revision range.
225 """
226 entries = run_svn_log(svn_url, rev_start, rev_end, 1, stop_on_copy, get_changed_paths, get_revprops)
227 if entries:
228 return entries[0]
229 raise EmptySVNLog("No SVN log for %s between revisions %s and %s" %
230 (svn_url, rev_start, rev_end))
231
232 def get_first_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
233 """
234 Get the first log entry after (or at) the given revision number in an SVN branch.
235 By default the revision number is set to 0, which will give you the log
236 entry corresponding to the branch creaction.
237
238 NOTE: to know whether the branch creation corresponds to an SVN import or
239 a copy from another branch, inspect elements of the 'changed_paths' entry
240 in the returned dictionary.
241 """
242 return get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True, get_changed_paths=True)
243
244 def get_last_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
245 """
246 Get the last log entry before/at the given revision number in an SVN branch.
247 By default the revision number is set to HEAD, which will give you the log
248 entry corresponding to the latest commit in branch.
249 """
250 return get_one_svn_log_entry(svn_url, rev_end, rev_start, stop_on_copy=True, get_changed_paths=True)
251
252
253 log_duration_threshold = 10.0
254 log_min_chunk_length = 10
255
256 def iter_svn_log_entries(svn_url, first_rev, last_rev, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
257 """
258 Iterate over SVN log entries between first_rev and last_rev.
259
260 This function features chunked log fetching so that it isn't too nasty
261 to the SVN server if many entries are requested.
262
263 NOTE: This chunked log fetching *ONLY* works correctly on paths which
264 are known to have existed unbroken in the SVN repository, e.g. /trunk.
265 Chunked fetching breaks down if a path existed in earlier, then was
266 deleted, and later was re-created. For example, if path was created in r5,
267 then deleted in r1000, and then later re-created in r5000...
268 svn log --stop-on-copy --limit 1 -r 1:50 "path/to/file"
269 --> would yield r5, i.e. the _initial_ creation
270 svn log --stop-on-copy --limit 1 -r 1:HEAD "path/to/file"
271 --> would yield r5000, i.e. the _re-creation_
272 In theory this might work if we always search "backwards", searching from
273 the end going forward rather than forward going to the end...
274 """
275 cur_rev = first_rev
276 chunk_length = log_min_chunk_length
277 first_run = True
278 while last_rev == "HEAD" or cur_rev <= last_rev:
279 start_t = time.time()
280 stop_rev = min(last_rev, cur_rev + chunk_length)
281 entries = run_svn_log(svn_url, cur_rev, stop_rev, chunk_length,
282 stop_on_copy, get_changed_paths, get_revprops)
283 duration = time.time() - start_t
284 if not first_run:
285 # skip first revision on subsequent runs, as it is overlapped
286 entries.pop(0)
287 first_run = False
288 if not entries:
289 break
290 for e in entries:
291 if e['revision'] > last_rev:
292 break
293 yield e
294 if e['revision'] >= last_rev:
295 break
296 cur_rev = e['revision']
297 # Adapt chunk length based on measured request duration
298 if duration < log_duration_threshold:
299 chunk_length = int(chunk_length * 2.0)
300 elif duration > log_duration_threshold * 2:
301 chunk_length = max(log_min_chunk_length, int(chunk_length / 2.0))
302
303
304 _svn_client_version = None
305
306 def get_svn_client_version():
307 """Returns the SVN client version as a tuple.
308
309 The returned tuple only contains numbers, non-digits in version string are
310 silently ignored.
311 """
312 global _svn_client_version
313 if _svn_client_version is None:
314 raw = run_svn(['--version', '-q']).strip()
315 _svn_client_version = tuple(map(int, [x for x in raw.split('.')
316 if x.isdigit()]))
317 return _svn_client_version