1 """ SVN client functions """
3 from shell
import run_svn
4 from errors
import EmptySVNLog
12 from xml
.etree
import cElementTree
as ET
15 from xml
.etree
import ElementTree
as ET
18 import cElementTree
as ET
20 from elementtree
import ElementTree
as ET
22 _identity_table
= "".join(map(chr, range(256)))
23 _forbidden_xml_chars
= "".join(
24 set(map(chr, range(32))) - set('\x09\x0A\x0D')
28 def strip_forbidden_xml_chars(xml_string
):
30 Given an XML string, strips forbidden characters as per the XML spec.
31 (these are all control characters except 0x9, 0xA and 0xD).
33 return xml_string
.translate(_identity_table
, _forbidden_xml_chars
)
36 def svn_date_to_timestamp(svn_date
):
38 Parse an SVN date as read from the XML output and return the corresponding
41 # Strip microseconds and timezone (always UTC, hopefully)
42 # XXX there are various ISO datetime parsing routines out there,
43 # cf. http://seehuhn.de/comp/pdate
44 date
= svn_date
.split('.', 2)[0]
45 time_tuple
= time
.strptime(date
, "%Y-%m-%dT%H:%M:%S")
46 return calendar
.timegm(time_tuple
)
48 def parse_svn_info_xml(xml_string
):
50 Parse the XML output from an "svn info" command and extract useful information
54 xml_string
= strip_forbidden_xml_chars(xml_string
)
55 tree
= ET
.fromstring(xml_string
)
56 entry
= tree
.find('.//entry')
57 d
['url'] = entry
.find('url').text
58 d
['kind'] = entry
.get('kind')
59 d
['revision'] = int(entry
.get('revision'))
60 d
['repos_url'] = tree
.find('.//repository/root').text
61 d
['repos_uuid'] = tree
.find('.//repository/uuid').text
62 d
['last_changed_rev'] = int(tree
.find('.//commit').get('revision'))
63 author_element
= tree
.find('.//commit/author')
64 if author_element
is not None:
65 d
['last_changed_author'] = author_element
.text
66 d
['last_changed_date'] = svn_date_to_timestamp(tree
.find('.//commit/date').text
)
69 def get_kind(svn_repos_url
, svn_path
, svn_rev
, action
, paths
):
71 Calculate the "kind"-type of a given URL in the SVN repo.
73 # By default, just do a simple "svn info" based on passed-in params.
77 # For deletions, we can't do an "svn info" at this revision.
78 # Need to trace ancestry backwards.
81 # Build a list of any copy-from's in this log_entry that we're a child of.
82 if p
['kind'] == 'dir' and p
['copyfrom_revision'] and svn_path
.startswith(p
['path']+"/"):
83 parents
.append(p
['path'])
85 # Use the nearest copy-from'd parent
87 parent
= parents
[len(parents
)-1]
89 if parent
== p
['path']:
90 info_path
= info_path
.replace(p
['path'], p
['copyfrom_path'])
91 info_rev
= p
['copyfrom_revision']
93 # If no parent copy-from's, then we should be able to check this path in
94 # the preceeding revision.
96 info
= get_svn_info(svn_repos_url
+info_path
, info_rev
)
99 def parse_svn_log_xml(xml_string
):
101 Parse the XML output from an "svn log" command and extract useful information
102 as a list of dicts (one per log changeset).
105 xml_string
= strip_forbidden_xml_chars(xml_string
)
106 tree
= ET
.fromstring(xml_string
)
107 for entry
in tree
.findall('logentry'):
109 d
['revision'] = int(entry
.get('revision'))
110 # Some revisions don't have authors, most notably the first revision
112 # logentry nodes targeting directories protected by path-based
113 # authentication have no child nodes at all. We return an entry
114 # in that case. Anyway, as it has no path entries, no further
115 # processing will be made.
116 author
= entry
.find('author')
117 date
= entry
.find('date')
118 msg
= entry
.find('msg')
119 d
['author'] = author
is not None and author
.text
or "No author"
120 d
['date_raw'] = date
.text
if date
is not None else None
121 d
['date'] = svn_date_to_timestamp(date
.text
) if date
is not None else None
122 d
['message'] = msg
is not None and msg
.text
and msg
.text
.replace('\r\n', '\n').replace('\n\r', '\n').replace('\r', '\n') or ""
124 for path
in entry
.findall('.//paths/path'):
125 copyfrom_rev
= path
.get('copyfrom-rev')
127 copyfrom_rev
= int(copyfrom_rev
)
130 'kind': path
.get('kind'),
131 'action': path
.get('action'),
132 'copyfrom_path': path
.get('copyfrom-path'),
133 'copyfrom_revision': copyfrom_rev
,
135 # Sort paths (i.e. into hierarchical order), so that process_svn_log_entry()
136 # can process actions in depth-first order.
137 d
['changed_paths'] = sorted(paths
, key
=operator
.itemgetter('path'))
139 for prop
in entry
.findall('.//revprops/property'):
140 revprops
.append({ 'name': prop.get('name'), 'value': prop.text }
)
141 d
['revprops'] = revprops
145 def parse_svn_status_xml(xml_string
, base_dir
=None, ignore_externals
=False):
147 Parse the XML output from an "svn status" command and extract useful info
148 as a list of dicts (one per status entry).
151 base_dir
= os
.path
.normcase(base_dir
)
153 xml_string
= strip_forbidden_xml_chars(xml_string
)
154 tree
= ET
.fromstring(xml_string
)
155 for entry
in tree
.findall('.//entry'):
157 path
= entry
.get('path')
158 if base_dir
is not None and os
.path
.normcase(path
).startswith(base_dir
):
159 path
= path
[len(base_dir
):].lstrip('/\\')
161 wc_status
= entry
.find('wc-status')
162 if wc_status
.get('item') == 'external':
165 status
= wc_status
.get('item')
166 revision
= wc_status
.get('revision')
167 if status
== 'external':
168 d
['type'] = 'external'
169 elif revision
is not None:
172 d
['type'] = 'unversioned'
174 d
['revision'] = revision
175 d
['props'] = wc_status
.get('props')
176 d
['copied'] = wc_status
.get('copied')
180 def get_svn_rev(svn_url_or_wc
, rev_number
):
182 Evaluate a given SVN revision pattern, to map it to a discrete rev #.
184 xml_string
= run_svn(['info', '--xml', '-r', rev_number
, svn_url_or_wc
], fail_if_stderr
=True)
185 info
= parse_svn_info_xml(xml_string
)
186 return info
['revision']
188 def get_svn_info(svn_url_or_wc
, rev_number
=None):
190 Get SVN information for the given URL or working copy, with an optionally
191 specified revision number.
192 Returns a dict as created by parse_svn_info_xml().
194 args
= ['info', '--xml']
195 if rev_number
is not None:
196 args
+= ["-r", rev_number
, svn_url_or_wc
+"@"+str(rev_number
)]
198 args
+= [svn_url_or_wc
]
199 xml_string
= run_svn(args
, fail_if_stderr
=True)
200 return parse_svn_info_xml(xml_string
)
202 def svn_checkout(svn_url
, checkout_dir
, rev_number
=None):
204 Checkout the given URL at an optional revision number.
206 args
= ['checkout', '-q']
207 if rev_number
is not None:
208 args
+= ['-r', rev_number
]
209 args
+= [svn_url
, checkout_dir
]
212 def run_svn_log(svn_url_or_wc
, rev_start
, rev_end
, limit
, stop_on_copy
=False, get_changed_paths
=True, get_revprops
=False):
214 Fetch up to 'limit' SVN log entries between the given revisions.
216 args
= ['log', '--xml']
218 args
+= ['--stop-on-copy']
219 if get_changed_paths
:
222 args
+= ['--with-all-revprops']
223 url
= str(svn_url_or_wc
)
224 args
+= ['-r', '%s:%s' % (rev_start
, rev_end
)]
225 if not "@" in svn_url_or_wc
:
226 url
= "%s@%s" % (svn_url_or_wc
, str(max(rev_start
, rev_end
)))
227 args
+= ['--limit', str(limit
), url
]
228 xml_string
= run_svn(args
)
229 return parse_svn_log_xml(xml_string
)
231 def get_svn_status(svn_wc
, quiet
=False, no_recursive
=False):
233 Get SVN status information about the given working copy.
235 # Ensure proper stripping by canonicalizing the path
236 svn_wc
= os
.path
.abspath(svn_wc
)
237 args
= ['status', '--xml', '--ignore-externals']
244 xml_string
= run_svn(args
+ [svn_wc
])
245 return parse_svn_status_xml(xml_string
, svn_wc
, ignore_externals
=True)
247 def get_svn_versioned_files(svn_wc
):
249 Get the list of versioned files in the SVN working copy.
252 for e
in get_svn_status(svn_wc
):
253 if e
['path'] and e
['type'] == 'normal':
254 contents
.append(e
['path'])
257 def get_one_svn_log_entry(svn_url
, rev_start
, rev_end
, stop_on_copy
=False, get_changed_paths
=True, get_revprops
=False):
259 Get the first SVN log entry in the requested revision range.
261 entries
= run_svn_log(svn_url
, rev_start
, rev_end
, 1, stop_on_copy
, get_changed_paths
, get_revprops
)
264 raise EmptySVNLog("No SVN log for %s between revisions %s and %s" %
265 (svn_url
, rev_start
, rev_end
))
267 def get_first_svn_log_entry(svn_url
, rev_start
, rev_end
, get_changed_paths
=True):
269 Get the first log entry after (or at) the given revision number in an SVN branch.
270 By default the revision number is set to 0, which will give you the log
271 entry corresponding to the branch creaction.
273 NOTE: to know whether the branch creation corresponds to an SVN import or
274 a copy from another branch, inspect elements of the 'changed_paths' entry
275 in the returned dictionary.
277 return get_one_svn_log_entry(svn_url
, rev_start
, rev_end
, stop_on_copy
=True, get_changed_paths
=True)
279 def get_last_svn_log_entry(svn_url
, rev_start
, rev_end
, get_changed_paths
=True):
281 Get the last log entry before/at the given revision number in an SVN branch.
282 By default the revision number is set to HEAD, which will give you the log
283 entry corresponding to the latest commit in branch.
285 return get_one_svn_log_entry(svn_url
, rev_end
, rev_start
, stop_on_copy
=True, get_changed_paths
=True)
288 log_duration_threshold
= 10.0
289 log_min_chunk_length
= 10
290 log_max_chunk_length
= 10000
292 def iter_svn_log_entries(svn_url
, first_rev
, last_rev
, stop_on_copy
=False, get_changed_paths
=True, get_revprops
=False):
294 Iterate over SVN log entries between first_rev and last_rev.
296 This function features chunked log fetching so that it isn't too nasty
297 to the SVN server if many entries are requested.
299 NOTE: This chunked log fetching *ONLY* works correctly on paths which
300 are known to have existed unbroken in the SVN repository, e.g. /trunk.
301 Chunked fetching breaks down if a path existed in earlier, then was
302 deleted, and later was re-created. For example, if path was created in r5,
303 then deleted in r1000, and then later re-created in r5000...
304 svn log --stop-on-copy --limit 1 -r 1:50 "path/to/file"
305 --> would yield r5, i.e. the _initial_ creation
306 svn log --stop-on-copy --limit 1 -r 1:HEAD "path/to/file"
307 --> would yield r5000, i.e. the _re-creation_
308 In theory this might work if we always search "backwards", searching from
309 the end going forward rather than forward going to the end...
311 if last_rev
== "HEAD":
312 info
= get_svn_info(svn_url
)
313 last_rev
= info
['revision']
315 chunk_length
= log_min_chunk_length
316 while cur_rev
<= last_rev
:
317 start_t
= time
.time()
318 stop_rev
= min(last_rev
, cur_rev
+ chunk_length
)
319 entries
= run_svn_log(svn_url
, cur_rev
, stop_rev
, chunk_length
,
320 stop_on_copy
, get_changed_paths
, get_revprops
)
321 duration
= time
.time() - start_t
324 if e
['revision'] > last_rev
:
327 if e
['revision'] >= last_rev
:
329 cur_rev
= e
['revision']+1
331 cur_rev
= int(stop_rev
)+1
332 # Adapt chunk length based on measured request duration
333 if duration
< log_duration_threshold
:
334 chunk_length
= min(log_max_chunk_length
, int(chunk_length
* 2.0))
335 elif duration
> log_duration_threshold
* 2:
336 chunk_length
= max(log_min_chunk_length
, int(chunk_length
/ 2.0))
339 _svn_client_version
= None
341 def get_svn_client_version():
343 Returns the SVN client version as a tuple.
345 The returned tuple only contains numbers, non-digits in version string are
348 global _svn_client_version
349 if _svn_client_version
is None:
350 raw
= run_svn(['--version', '-q']).strip()
351 _svn_client_version
= tuple(map(int, [x
for x
in raw
.split('.')
353 return _svn_client_version
356 def parse_svn_propget_xml(xml_string
):
358 Parse the XML output from an "svn propget" command and extract useful
359 information as a dict.
362 xml_string
= strip_forbidden_xml_chars(xml_string
)
363 tree
= ET
.fromstring(xml_string
)
364 prop
= tree
.find('.//property')
365 d
['name'] = prop
.get('name')
366 d
['value'] = prop
is not None and prop
.text
and prop
.text
.replace('\r\n', '\n').replace('\n\r', '\n').replace('\r', '\n') or ""
369 def parse_svn_proplist_xml(xml_string
):
371 Parse the XML output from an "svn proplist" command and extract list
375 xml_string
= strip_forbidden_xml_chars(xml_string
)
376 tree
= ET
.fromstring(xml_string
)
377 for prop
in tree
.findall('.//property'):
378 l
.append(prop
.get('name'))
381 def get_prop_value(svn_url_or_wc
, prop_name
, rev_number
=None):
383 Get the value of a versioned property for the given path.
385 args
= ['propget', '--xml']
386 url
= str(svn_url_or_wc
)
388 args
+= ['-r', rev_number
]
389 if not "@" in svn_url_or_wc
:
390 url
= "%s@%s" % (svn_url_or_wc
, str(rev_number
))
391 args
+= [prop_name
, url
]
392 xml_string
= run_svn(args
)
393 return parse_svn_propget_xml(xml_string
)
395 def get_all_props(svn_url_or_wc
, rev_number
=None):
397 Get the values of all versioned properties for the given path.
400 args
= ['proplist', '--xml']
401 url
= str(svn_url_or_wc
)
403 args
+= ['-r', rev_number
]
404 if not "@" in svn_url_or_wc
:
405 url
= "%s@%s" % (svn_url_or_wc
, str(rev_number
))
407 xml_string
= run_svn(args
)
408 props
= parse_svn_proplist_xml(xml_string
)
409 for prop_name
in props
:
410 d
= get_prop_value(svn_url_or_wc
, prop_name
, rev_number
)
411 l
[d
['name']] = d
['value']