1 """ SVN client functions """
3 from shell
import run_svn
4 from errors
import EmptySVNLog
12 from xml
.etree
import cElementTree
as ET
15 from xml
.etree
import ElementTree
as ET
18 import cElementTree
as ET
20 from elementtree
import ElementTree
as ET
22 _identity_table
= "".join(map(chr, range(256)))
23 _forbidden_xml_chars
= "".join(
24 set(map(chr, range(32))) - set('\x09\x0A\x0D')
28 def strip_forbidden_xml_chars(xml_string
):
30 Given an XML string, strips forbidden characters as per the XML spec.
31 (these are all control characters except 0x9, 0xA and 0xD).
33 return xml_string
.translate(_identity_table
, _forbidden_xml_chars
)
36 def svn_date_to_timestamp(svn_date
):
38 Parse an SVN date as read from the XML output and return the corresponding
41 # Strip microseconds and timezone (always UTC, hopefully)
42 # XXX there are various ISO datetime parsing routines out there,
43 # cf. http://seehuhn.de/comp/pdate
44 date
= svn_date
.split('.', 2)[0]
45 time_tuple
= time
.strptime(date
, "%Y-%m-%dT%H:%M:%S")
46 return calendar
.timegm(time_tuple
)
48 def parse_svn_info_xml(xml_string
):
50 Parse the XML output from an "svn info" command and extract useful information
54 xml_string
= strip_forbidden_xml_chars(xml_string
)
55 tree
= ET
.fromstring(xml_string
)
56 entry
= tree
.find('.//entry')
57 d
['url'] = entry
.find('url').text
58 d
['kind'] = entry
.get('kind')
59 d
['revision'] = int(entry
.get('revision'))
60 d
['repos_url'] = tree
.find('.//repository/root').text
61 d
['repos_uuid'] = tree
.find('.//repository/uuid').text
62 d
['last_changed_rev'] = int(tree
.find('.//commit').get('revision'))
63 author_element
= tree
.find('.//commit/author')
64 if author_element
is not None:
65 d
['last_changed_author'] = author_element
.text
66 d
['last_changed_date'] = svn_date_to_timestamp(tree
.find('.//commit/date').text
)
69 def _get_kind(svn_repos_url
, svn_path
, svn_rev
, action
, paths
):
71 Calculate the "kind"-type of a given URL in the SVN repo.
73 # By default, just do a simple "svn info" based on passed-in params.
77 # For deletions, we can't do an "svn info" at this revision.
78 # Need to trace ancestry backwards.
81 # Build a list of any copy-from's in this log_entry that we're a child of.
82 if p
['copyfrom_revision'] and svn_path
.startswith(p
['path']):
83 parents
.append(p
['path'])
85 # Use the nearest copy-from'd parent
87 parent
= parents
[len(parents
)-1]
89 if parent
== p
['path']:
90 info_path
= p
['copyfrom_path']
91 info_rev
= p
['copyfrom_revision']
93 # If no parent copy-from's, then we should be able to check this path in
94 # the preceeding revision.
96 info
= get_svn_info(svn_repos_url
+info_path
, info_rev
)
99 def parse_svn_log_xml(xml_string
, svn_url_or_wc
):
101 Parse the XML output from an "svn log" command and extract useful information
102 as a list of dicts (one per log changeset).
107 xml_string
= strip_forbidden_xml_chars(xml_string
)
108 tree
= ET
.fromstring(xml_string
)
109 for entry
in tree
.findall('logentry'):
111 d
['revision'] = int(entry
.get('revision'))
113 info
= get_svn_info(svn_url_or_wc
, d
['revision'])
114 svn_repos_url
= info
['repos_url']
115 # Some revisions don't have authors, most notably the first revision
117 # logentry nodes targeting directories protected by path-based
118 # authentication have no child nodes at all. We return an entry
119 # in that case. Anyway, as it has no path entries, no further
120 # processing will be made.
121 author
= entry
.find('author')
122 date
= entry
.find('date')
123 msg
= entry
.find('msg')
124 d
['author'] = author
is not None and author
.text
or "No author"
126 d
['date'] = svn_date_to_timestamp(date
.text
)
129 d
['message'] = msg
is not None and msg
.text
and msg
.text
.replace('\r\n', '\n').replace('\n\r', '\n').replace('\r', '\n') or ""
131 for path
in entry
.findall('.//paths/path'):
132 copyfrom_rev
= path
.get('copyfrom-rev')
134 copyfrom_rev
= int(copyfrom_rev
)
136 kind
= path
.get('kind')
137 action
= path
.get('action')
139 kind
= _get_kind(svn_repos_url
, cur_path
, d
['revision'], action
, paths
)
140 assert (kind
== 'file') or (kind
== 'dir')
145 'copyfrom_path': path
.get('copyfrom-path'),
146 'copyfrom_revision': copyfrom_rev
,
148 # Sort paths (i.e. into hierarchical order), so that process_svn_log_entry()
149 # can process actions in depth-first order.
150 d
['changed_paths'] = sorted(paths
, key
=operator
.itemgetter('path'))
152 for prop
in entry
.findall('.//revprops/property'):
153 revprops
.append({ 'name': prop.get('name'), 'value': prop.text }
)
154 d
['revprops'] = revprops
158 def parse_svn_status_xml(xml_string
, base_dir
=None, ignore_externals
=False):
160 Parse the XML output from an "svn status" command and extract useful info
161 as a list of dicts (one per status entry).
164 base_dir
= os
.path
.normcase(base_dir
)
166 xml_string
= strip_forbidden_xml_chars(xml_string
)
167 tree
= ET
.fromstring(xml_string
)
168 for entry
in tree
.findall('.//entry'):
170 path
= entry
.get('path')
171 if base_dir
is not None:
172 assert os
.path
.normcase(path
).startswith(base_dir
)
173 path
= path
[len(base_dir
):].lstrip('/\\')
175 wc_status
= entry
.find('wc-status')
176 if wc_status
.get('item') == 'external':
179 status
= wc_status
.get('item')
180 revision
= wc_status
.get('revision')
181 if status
== 'external':
182 d
['type'] = 'external'
183 elif revision
is not None:
186 d
['type'] = 'unversioned'
188 d
['revision'] = revision
189 d
['props'] = wc_status
.get('props')
190 d
['copied'] = wc_status
.get('copied')
194 def get_svn_info(svn_url_or_wc
, rev_number
=None):
196 Get SVN information for the given URL or working copy, with an optionally
197 specified revision number.
198 Returns a dict as created by parse_svn_info_xml().
200 args
= ['info', '--xml']
201 if rev_number
is not None:
202 args
+= ["-r", rev_number
, svn_url_or_wc
+"@"+str(rev_number
)]
204 args
+= [svn_url_or_wc
]
205 xml_string
= run_svn(args
, fail_if_stderr
=True)
206 return parse_svn_info_xml(xml_string
)
208 def svn_checkout(svn_url
, checkout_dir
, rev_number
=None):
210 Checkout the given URL at an optional revision number.
212 args
= ['checkout', '-q']
213 if rev_number
is not None:
214 args
+= ['-r', rev_number
]
215 args
+= [svn_url
, checkout_dir
]
218 def run_svn_log(svn_url_or_wc
, rev_start
, rev_end
, limit
, stop_on_copy
=False, get_changed_paths
=True, get_revprops
=False):
220 Fetch up to 'limit' SVN log entries between the given revisions.
222 args
= ['log', '--xml']
224 args
+= ['--stop-on-copy']
225 if get_changed_paths
:
228 args
+= ['--with-all-revprops']
229 url
= str(svn_url_or_wc
)
230 args
+= ['-r', '%s:%s' % (rev_start
, rev_end
)]
231 if not "@" in svn_url_or_wc
:
232 url
= "%s@%s" % (svn_url_or_wc
, str(max(rev_start
, rev_end
)))
233 args
+= ['--limit', str(limit
), url
]
234 xml_string
= run_svn(args
)
235 return parse_svn_log_xml(xml_string
, svn_url_or_wc
)
237 def get_svn_status(svn_wc
, quiet
=False, no_recursive
=False):
239 Get SVN status information about the given working copy.
241 # Ensure proper stripping by canonicalizing the path
242 svn_wc
= os
.path
.abspath(svn_wc
)
243 args
= ['status', '--xml', '--ignore-externals']
250 xml_string
= run_svn(args
+ [svn_wc
])
251 return parse_svn_status_xml(xml_string
, svn_wc
, ignore_externals
=True)
253 def get_svn_versioned_files(svn_wc
):
255 Get the list of versioned files in the SVN working copy.
258 for e
in get_svn_status(svn_wc
):
259 if e
['path'] and e
['type'] == 'normal':
260 contents
.append(e
['path'])
263 def get_one_svn_log_entry(svn_url
, rev_start
, rev_end
, stop_on_copy
=False, get_changed_paths
=True, get_revprops
=False):
265 Get the first SVN log entry in the requested revision range.
267 entries
= run_svn_log(svn_url
, rev_start
, rev_end
, 1, stop_on_copy
, get_changed_paths
, get_revprops
)
270 raise EmptySVNLog("No SVN log for %s between revisions %s and %s" %
271 (svn_url
, rev_start
, rev_end
))
273 def get_first_svn_log_entry(svn_url
, rev_start
, rev_end
, get_changed_paths
=True):
275 Get the first log entry after (or at) the given revision number in an SVN branch.
276 By default the revision number is set to 0, which will give you the log
277 entry corresponding to the branch creaction.
279 NOTE: to know whether the branch creation corresponds to an SVN import or
280 a copy from another branch, inspect elements of the 'changed_paths' entry
281 in the returned dictionary.
283 return get_one_svn_log_entry(svn_url
, rev_start
, rev_end
, stop_on_copy
=True, get_changed_paths
=True)
285 def get_last_svn_log_entry(svn_url
, rev_start
, rev_end
, get_changed_paths
=True):
287 Get the last log entry before/at the given revision number in an SVN branch.
288 By default the revision number is set to HEAD, which will give you the log
289 entry corresponding to the latest commit in branch.
291 return get_one_svn_log_entry(svn_url
, rev_end
, rev_start
, stop_on_copy
=True, get_changed_paths
=True)
294 log_duration_threshold
= 10.0
295 log_min_chunk_length
= 10
296 log_max_chunk_length
= 10000
298 def iter_svn_log_entries(svn_url
, first_rev
, last_rev
, stop_on_copy
=False, get_changed_paths
=True, get_revprops
=False):
300 Iterate over SVN log entries between first_rev and last_rev.
302 This function features chunked log fetching so that it isn't too nasty
303 to the SVN server if many entries are requested.
305 NOTE: This chunked log fetching *ONLY* works correctly on paths which
306 are known to have existed unbroken in the SVN repository, e.g. /trunk.
307 Chunked fetching breaks down if a path existed in earlier, then was
308 deleted, and later was re-created. For example, if path was created in r5,
309 then deleted in r1000, and then later re-created in r5000...
310 svn log --stop-on-copy --limit 1 -r 1:50 "path/to/file"
311 --> would yield r5, i.e. the _initial_ creation
312 svn log --stop-on-copy --limit 1 -r 1:HEAD "path/to/file"
313 --> would yield r5000, i.e. the _re-creation_
314 In theory this might work if we always search "backwards", searching from
315 the end going forward rather than forward going to the end...
317 if last_rev
== "HEAD":
318 info
= get_svn_info(svn_url
)
319 last_rev
= info
['revision']
321 chunk_length
= log_min_chunk_length
322 while cur_rev
<= last_rev
:
323 start_t
= time
.time()
324 stop_rev
= min(last_rev
, cur_rev
+ chunk_length
)
325 entries
= run_svn_log(svn_url
, cur_rev
, stop_rev
, chunk_length
,
326 stop_on_copy
, get_changed_paths
, get_revprops
)
327 duration
= time
.time() - start_t
330 if e
['revision'] > last_rev
:
333 if e
['revision'] >= last_rev
:
335 cur_rev
= e
['revision']+1
337 cur_rev
= int(stop_rev
)+1
338 # Adapt chunk length based on measured request duration
339 if duration
< log_duration_threshold
:
340 chunk_length
= min(log_max_chunk_length
, int(chunk_length
* 2.0))
341 elif duration
> log_duration_threshold
* 2:
342 chunk_length
= max(log_min_chunk_length
, int(chunk_length
/ 2.0))
345 _svn_client_version
= None
347 def get_svn_client_version():
349 Returns the SVN client version as a tuple.
351 The returned tuple only contains numbers, non-digits in version string are
354 global _svn_client_version
355 if _svn_client_version
is None:
356 raw
= run_svn(['--version', '-q']).strip()
357 _svn_client_version
= tuple(map(int, [x
for x
in raw
.split('.')
359 return _svn_client_version