]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn/svnclient.py
WIP on verify
[svn2svn.git] / svn2svn / svnclient.py
1 """ SVN client functions """
2
3 from shell import run_svn
4 from errors import EmptySVNLog
5
6 import os
7 import time
8 import calendar
9 import operator
10
11 try:
12 from xml.etree import cElementTree as ET
13 except ImportError:
14 try:
15 from xml.etree import ElementTree as ET
16 except ImportError:
17 try:
18 import cElementTree as ET
19 except ImportError:
20 from elementtree import ElementTree as ET
21
22 _identity_table = "".join(map(chr, range(256)))
23 _forbidden_xml_chars = "".join(
24 set(map(chr, range(32))) - set('\x09\x0A\x0D')
25 )
26
27
28 def strip_forbidden_xml_chars(xml_string):
29 """
30 Given an XML string, strips forbidden characters as per the XML spec.
31 (these are all control characters except 0x9, 0xA and 0xD).
32 """
33 return xml_string.translate(_identity_table, _forbidden_xml_chars)
34
35
36 def svn_date_to_timestamp(svn_date):
37 """
38 Parse an SVN date as read from the XML output and return the corresponding
39 timestamp.
40 """
41 # Strip microseconds and timezone (always UTC, hopefully)
42 # XXX there are various ISO datetime parsing routines out there,
43 # cf. http://seehuhn.de/comp/pdate
44 date = svn_date.split('.', 2)[0]
45 time_tuple = time.strptime(date, "%Y-%m-%dT%H:%M:%S")
46 return calendar.timegm(time_tuple)
47
48 def parse_svn_info_xml(xml_string):
49 """
50 Parse the XML output from an "svn info" command and extract useful information
51 as a dict.
52 """
53 d = {}
54 xml_string = strip_forbidden_xml_chars(xml_string)
55 print xml_string
56 tree = ET.fromstring(xml_string)
57 entry = tree.find('.//entry')
58 d['url'] = entry.find('url').text
59 d['kind'] = entry.get('kind')
60 d['revision'] = int(entry.get('revision'))
61 d['repos_url'] = entry.find('.//repository/root').text
62 elem = entry.find('.//repository/uuid')
63 if elem is not None:
64 d['repos_uuid'] = elem.text
65 wc_info = entry.find('.//wc-info')
66 d['is_wc'] = is_wc = True if wc_info is not None else False
67 print str(is_wc)
68 if is_wc:
69 d['schedule'] = wc_info.find('schedule').text
70 if wc_info.find('copy-from-url') is not None:
71 d['copy-from-url'] = wc_info.find('copy-from-url').text
72 d['copy-from-rev'] = wc_info.find('copy-from-rev').text
73 commit = entry.find('commit')
74 if commit is not None:
75 d['last_changed_rev'] = int(commit.get('revision'))
76 author = commit.find('author')
77 if author is not None:
78 d['last_changed_author'] = author.text
79 d['last_changed_date'] = svn_date_to_timestamp(commit.find('date').text)
80 return d
81
82 def parse_svn_log_xml(xml_string):
83 """
84 Parse the XML output from an "svn log" command and extract useful information
85 as a list of dicts (one per log changeset).
86 """
87 l = []
88 xml_string = strip_forbidden_xml_chars(xml_string)
89 tree = ET.fromstring(xml_string)
90 for entry in tree.findall('logentry'):
91 d = {}
92 d['revision'] = int(entry.get('revision'))
93 # Some revisions don't have authors, most notably the first revision
94 # in a repository.
95 # logentry nodes targeting directories protected by path-based
96 # authentication have no child nodes at all. We return an entry
97 # in that case. Anyway, as it has no path entries, no further
98 # processing will be made.
99 author = entry.find('author')
100 date = entry.find('date')
101 msg = entry.find('msg')
102 d['author'] = author is not None and author.text or "No author"
103 if date is not None:
104 d['date'] = svn_date_to_timestamp(date.text)
105 else:
106 d['date'] = None
107 d['message'] = msg is not None and msg.text.replace('\r\n', '\n').replace('\n\r', '\n').replace('\r', '\n') or ""
108 paths = []
109 for path in entry.findall('.//paths/path'):
110 copyfrom_rev = path.get('copyfrom-rev')
111 if copyfrom_rev:
112 copyfrom_rev = int(copyfrom_rev)
113 paths.append({
114 'path': path.text,
115 'kind': path.get('kind'),
116 'action': path.get('action'),
117 'copyfrom_path': path.get('copyfrom-path'),
118 'copyfrom_revision': copyfrom_rev,
119 })
120 # Sort paths (i.e. into hierarchical order), so that process_svn_log_entry()
121 # can process actions in depth-first order.
122 d['changed_paths'] = sorted(paths, key=operator.itemgetter('path'))
123 revprops = []
124 for prop in entry.findall('.//revprops/property'):
125 revprops.append({ 'name': prop.get('name'), 'value': prop.text })
126 d['revprops'] = revprops
127 l.append(d)
128 return l
129
130 def parse_svn_status_xml(xml_string, base_dir=None, ignore_externals=False):
131 """
132 Parse the XML output from an "svn status" command and extract useful info
133 as a list of dicts (one per status entry).
134 """
135 if base_dir:
136 base_dir = os.path.normcase(base_dir)
137 l = []
138 xml_string = strip_forbidden_xml_chars(xml_string)
139 tree = ET.fromstring(xml_string)
140 for entry in tree.findall('.//entry'):
141 d = {}
142 path = entry.get('path')
143 if base_dir is not None:
144 assert os.path.normcase(path).startswith(base_dir)
145 path = path[len(base_dir):].lstrip('/\\')
146 d['path'] = path
147 wc_status = entry.find('wc-status')
148 if wc_status.get('item') == 'external':
149 if ignore_externals:
150 continue
151 status = wc_status.get('item')
152 revision = wc_status.get('revision')
153 if status == 'external':
154 d['type'] = 'external'
155 elif revision is not None:
156 d['type'] = 'normal'
157 else:
158 d['type'] = 'unversioned'
159 d['status'] = status
160 d['revision'] = revision
161 d['props'] = wc_status.get('props')
162 d['copied'] = wc_status.get('copied')
163 l.append(d)
164 return l
165
166 def get_svn_info(svn_url_or_wc, rev_number=None):
167 """
168 Get SVN information for the given URL or working copy, with an optionally
169 specified revision number.
170 Returns a dict as created by parse_svn_info_xml().
171 """
172 args = ['info', '--xml']
173 if rev_number is not None:
174 args += ["-r", rev_number, svn_url_or_wc+"@"+str(rev_number)]
175 else:
176 args += [svn_url_or_wc]
177 xml_string = run_svn(args, fail_if_stderr=True)
178 return parse_svn_info_xml(xml_string)
179
180 def svn_checkout(svn_url, checkout_dir, rev_number=None):
181 """
182 Checkout the given URL at an optional revision number.
183 """
184 args = ['checkout', '-q']
185 if rev_number is not None:
186 args += ['-r', rev_number]
187 args += [svn_url, checkout_dir]
188 return run_svn(args)
189
190 def run_svn_log(svn_url_or_wc, rev_start, rev_end, limit, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
191 """
192 Fetch up to 'limit' SVN log entries between the given revisions.
193 """
194 args = ['log', '--xml']
195 if stop_on_copy:
196 args += ['--stop-on-copy']
197 if get_changed_paths:
198 args += ['-v']
199 if get_revprops:
200 args += ['--with-all-revprops']
201 url = str(svn_url_or_wc)
202 args += ['-r', '%s:%s' % (rev_start, rev_end)]
203 if not "@" in svn_url_or_wc:
204 url = "%s@%s" % (svn_url_or_wc, str(max(rev_start, rev_end)))
205 args += ['--limit', str(limit), url]
206 xml_string = run_svn(args)
207 return parse_svn_log_xml(xml_string)
208
209 def get_svn_status(svn_wc, quiet=False, no_recursive=False):
210 """
211 Get SVN status information about the given working copy.
212 """
213 # Ensure proper stripping by canonicalizing the path
214 svn_wc = os.path.abspath(svn_wc)
215 args = ['status', '--xml', '--ignore-externals']
216 if quiet:
217 args += ['-q']
218 else:
219 args += ['-v']
220 if no_recursive:
221 args += ['-N']
222 xml_string = run_svn(args + [svn_wc])
223 return parse_svn_status_xml(xml_string, svn_wc, ignore_externals=True)
224
225 def get_svn_versioned_files(svn_wc):
226 """
227 Get the list of versioned files in the SVN working copy.
228 """
229 contents = []
230 for e in get_svn_status(svn_wc):
231 if e['path'] and e['type'] == 'normal':
232 contents.append(e['path'])
233 return contents
234
235 def get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
236 """
237 Get the first SVN log entry in the requested revision range.
238 """
239 entries = run_svn_log(svn_url, rev_start, rev_end, 1, stop_on_copy, get_changed_paths, get_revprops)
240 if entries:
241 return entries[0]
242 raise EmptySVNLog("No SVN log for %s between revisions %s and %s" %
243 (svn_url, rev_start, rev_end))
244
245 def get_first_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
246 """
247 Get the first log entry after (or at) the given revision number in an SVN branch.
248 By default the revision number is set to 0, which will give you the log
249 entry corresponding to the branch creaction.
250
251 NOTE: to know whether the branch creation corresponds to an SVN import or
252 a copy from another branch, inspect elements of the 'changed_paths' entry
253 in the returned dictionary.
254 """
255 return get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True, get_changed_paths=True)
256
257 def get_last_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
258 """
259 Get the last log entry before/at the given revision number in an SVN branch.
260 By default the revision number is set to HEAD, which will give you the log
261 entry corresponding to the latest commit in branch.
262 """
263 return get_one_svn_log_entry(svn_url, rev_end, rev_start, stop_on_copy=True, get_changed_paths=True)
264
265
266 log_duration_threshold = 10.0
267 log_min_chunk_length = 10
268
269 def iter_svn_log_entries(svn_url, first_rev, last_rev, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
270 """
271 Iterate over SVN log entries between first_rev and last_rev.
272
273 This function features chunked log fetching so that it isn't too nasty
274 to the SVN server if many entries are requested.
275 """
276 cur_rev = first_rev
277 chunk_length = log_min_chunk_length
278 first_run = True
279 while last_rev == "HEAD" or cur_rev <= last_rev:
280 start_t = time.time()
281 stop_rev = min(last_rev, cur_rev + chunk_length)
282 entries = run_svn_log(svn_url, cur_rev, "HEAD", chunk_length,
283 stop_on_copy, get_changed_paths, get_revprops)
284 duration = time.time() - start_t
285 if not first_run:
286 # skip first revision on subsequent runs, as it is overlapped
287 entries.pop(0)
288 first_run = False
289 if not entries:
290 break
291 for e in entries:
292 if e['revision'] > last_rev:
293 break
294 yield e
295 if e['revision'] >= last_rev:
296 break
297 cur_rev = e['revision']
298 # Adapt chunk length based on measured request duration
299 if duration < log_duration_threshold:
300 chunk_length = int(chunk_length * 2.0)
301 elif duration > log_duration_threshold * 2:
302 chunk_length = max(log_min_chunk_length, int(chunk_length / 2.0))
303
304
305 _svn_client_version = None
306
307 def get_svn_client_version():
308 """Returns the SVN client version as a tuple.
309
310 The returned tuple only contains numbers, non-digits in version string are
311 silently ignored.
312 """
313 global _svn_client_version
314 if _svn_client_version is None:
315 raw = run_svn(['--version', '-q']).strip()
316 _svn_client_version = tuple(map(int, [x for x in raw.split('.')
317 if x.isdigit()]))
318 return _svn_client_version