1 """ SVN client functions """ 
   3 from shell 
import run_svn
 
   4 from errors 
import EmptySVNLog
 
  12     from xml
.etree 
import cElementTree 
as ET
 
  15         from xml
.etree 
import ElementTree 
as ET
 
  18             import cElementTree 
as ET
 
  20             from elementtree 
import ElementTree 
as ET
 
  22 _identity_table 
= "".join(map(chr, range(256))) 
  23 _forbidden_xml_chars 
= "".join( 
  24     set(map(chr, range(32))) - set('\x09\x0A\x0D') 
  28 def strip_forbidden_xml_chars(xml_string
): 
  30     Given an XML string, strips forbidden characters as per the XML spec. 
  31     (these are all control characters except 0x9, 0xA and 0xD). 
  33     return xml_string
.translate(_identity_table
, _forbidden_xml_chars
) 
  36 def svn_date_to_timestamp(svn_date
): 
  38     Parse an SVN date as read from the XML output and return the corresponding 
  41     # Strip microseconds and timezone (always UTC, hopefully) 
  42     # XXX there are various ISO datetime parsing routines out there, 
  43     # cf. http://seehuhn.de/comp/pdate 
  44     date 
= svn_date
.split('.', 2)[0] 
  45     time_tuple 
= time
.strptime(date
, "%Y-%m-%dT%H:%M:%S") 
  46     return calendar
.timegm(time_tuple
) 
  48 def parse_svn_info_xml(xml_string
): 
  50     Parse the XML output from an "svn info" command and extract useful information 
  54     xml_string 
= strip_forbidden_xml_chars(xml_string
) 
  55     tree 
= ET
.fromstring(xml_string
) 
  56     entry 
= tree
.find('.//entry') 
  57     d
['url'] = entry
.find('url').text
 
  58     d
['kind'] = entry
.get('kind') 
  59     d
['revision'] = int(entry
.get('revision')) 
  60     d
['repos_url'] = tree
.find('.//repository/root').text
 
  61     d
['repos_uuid'] = tree
.find('.//repository/uuid').text
 
  62     d
['last_changed_rev'] = int(tree
.find('.//commit').get('revision')) 
  63     author_element 
= tree
.find('.//commit/author') 
  64     if author_element 
is not None: 
  65         d
['last_changed_author'] = author_element
.text
 
  66     d
['last_changed_date'] = svn_date_to_timestamp(tree
.find('.//commit/date').text
) 
  69 def get_kind(svn_repos_url
, svn_path
, svn_rev
, action
, paths
): 
  71     Calculate the "kind"-type of a given URL in the SVN repo. 
  73     # By default, just do a simple "svn info" based on passed-in params. 
  77         # For deletions, we can't do an "svn info" at this revision. 
  78         # Need to trace ancestry backwards. 
  81             # Build a list of any copy-from's in this log_entry that we're a child of. 
  82             if p
['kind'] == 'dir' and p
['copyfrom_revision'] and svn_path
.startswith(p
['path']+"/"): 
  83                 parents
.append(p
['path']) 
  85             # Use the nearest copy-from'd parent 
  87             parent 
= parents
[len(parents
)-1] 
  89                 if parent 
== p
['path']: 
  90                     info_path 
= info_path
.replace(p
['path'], p
['copyfrom_path']) 
  91                     info_rev 
=  p
['copyfrom_revision'] 
  93             # If no parent copy-from's, then we should be able to check this path in 
  94             # the preceeding revision. 
  96     info 
= get_svn_info(svn_repos_url
+info_path
, info_rev
) 
  99 def parse_svn_log_xml(xml_string
): 
 101     Parse the XML output from an "svn log" command and extract useful information 
 102     as a list of dicts (one per log changeset). 
 105     xml_string 
= strip_forbidden_xml_chars(xml_string
) 
 106     tree 
= ET
.fromstring(xml_string
) 
 107     for entry 
in tree
.findall('logentry'): 
 109         d
['revision'] = int(entry
.get('revision')) 
 110         # Some revisions don't have authors, most notably the first revision 
 112         # logentry nodes targeting directories protected by path-based 
 113         # authentication have no child nodes at all. We return an entry 
 114         # in that case. Anyway, as it has no path entries, no further 
 115         # processing will be made. 
 116         author 
= entry
.find('author') 
 117         date 
= entry
.find('date') 
 118         msg 
= entry
.find('msg') 
 119         d
['author'] = author 
is not None and author
.text 
or "No author" 
 120         d
['date_raw'] = date
.text 
if date 
is not None else None 
 121         d
['date'] = svn_date_to_timestamp(date
.text
) if date 
is not None else None 
 122         d
['message'] = msg 
is not None and msg
.text 
and msg
.text
.replace('\r\n', '\n').replace('\n\r', '\n').replace('\r', '\n') or "" 
 124         for path 
in entry
.findall('.//paths/path'): 
 125             copyfrom_rev 
= path
.get('copyfrom-rev') 
 127                 copyfrom_rev 
= int(copyfrom_rev
) 
 130                 'kind': path
.get('kind'), 
 131                 'action': path
.get('action'), 
 132                 'copyfrom_path': path
.get('copyfrom-path'), 
 133                 'copyfrom_revision': copyfrom_rev
, 
 135         # Sort paths (i.e. into hierarchical order), so that process_svn_log_entry() 
 136         # can process actions in depth-first order. 
 137         d
['changed_paths'] = sorted(paths
, key
=operator
.itemgetter('path')) 
 139         for prop 
in entry
.findall('.//revprops/property'): 
 140             revprops
.append({ 'name': prop.get('name'), 'value': prop.text }
) 
 141         d
['revprops'] = revprops
 
 145 def parse_svn_status_xml(xml_string
, base_dir
=None, ignore_externals
=False): 
 147     Parse the XML output from an "svn status" command and extract useful info 
 148     as a list of dicts (one per status entry). 
 151         base_dir 
= os
.path
.normcase(base_dir
) 
 153     xml_string 
= strip_forbidden_xml_chars(xml_string
) 
 154     tree 
= ET
.fromstring(xml_string
) 
 155     for entry 
in tree
.findall('.//entry'): 
 157         path 
= entry
.get('path') 
 158         if base_dir 
is not None and os
.path
.normcase(path
).startswith(base_dir
): 
 159             path 
= path
[len(base_dir
):].lstrip('/\\') 
 161         wc_status 
= entry
.find('wc-status') 
 162         if wc_status
.get('item') == 'external': 
 165         status 
=   wc_status
.get('item') 
 166         revision 
= wc_status
.get('revision') 
 167         if status 
== 'external': 
 168             d
['type'] = 'external' 
 169         elif revision 
is not None: 
 172             d
['type'] = 'unversioned' 
 174         d
['revision'] = revision
 
 175         d
['props'] =    wc_status
.get('props') 
 176         d
['copied'] =   wc_status
.get('copied') 
 180 def get_svn_rev(svn_url_or_wc
, rev_number
): 
 182     Evaluate a given SVN revision pattern, to map it to a discrete rev #. 
 184     xml_string 
= run_svn(['info', '--xml', '-r', rev_number
, svn_url_or_wc
], fail_if_stderr
=True) 
 185     info 
= parse_svn_info_xml(xml_string
) 
 186     return info
['revision'] 
 188 def get_svn_info(svn_url_or_wc
, rev_number
=None): 
 190     Get SVN information for the given URL or working copy, with an optionally 
 191     specified revision number. 
 192     Returns a dict as created by parse_svn_info_xml(). 
 194     args 
= ['info', '--xml'] 
 195     if rev_number 
is not None: 
 196         args 
+= ["-r", rev_number
, svn_url_or_wc
+"@"+str(rev_number
)] 
 198         args 
+= [svn_url_or_wc
] 
 199     xml_string 
= run_svn(args
, fail_if_stderr
=True) 
 200     return parse_svn_info_xml(xml_string
) 
 202 def svn_checkout(svn_url
, checkout_dir
, rev_number
=None): 
 204     Checkout the given URL at an optional revision number. 
 206     args 
= ['checkout', '-q'] 
 207     if rev_number 
is not None: 
 208         args 
+= ['-r', rev_number
] 
 209     args 
+= [svn_url
, checkout_dir
] 
 212 def run_svn_log(svn_url_or_wc
, rev_start
, rev_end
, limit
, stop_on_copy
=False, get_changed_paths
=True, get_revprops
=False): 
 214     Fetch up to 'limit' SVN log entries between the given revisions. 
 216     args 
= ['log', '--xml'] 
 218         args 
+= ['--stop-on-copy'] 
 219     if get_changed_paths
: 
 222         args 
+= ['--with-all-revprops'] 
 223     url 
= str(svn_url_or_wc
) 
 224     args 
+= ['-r', '%s:%s' % (rev_start
, rev_end
)] 
 225     if not "@" in svn_url_or_wc
: 
 226         url 
= "%s@%s" % (svn_url_or_wc
, str(max(rev_start
, rev_end
))) 
 227     args 
+= ['--limit', str(limit
), url
] 
 228     xml_string 
= run_svn(args
) 
 229     return parse_svn_log_xml(xml_string
) 
 231 def get_svn_status(svn_wc
, quiet
=False, no_recursive
=False): 
 233     Get SVN status information about the given working copy. 
 235     # Ensure proper stripping by canonicalizing the path 
 236     svn_wc 
= os
.path
.abspath(svn_wc
) 
 237     args 
= ['status', '--xml', '--ignore-externals'] 
 244     xml_string 
= run_svn(args 
+ [svn_wc
]) 
 245     return parse_svn_status_xml(xml_string
, svn_wc
, ignore_externals
=True) 
 247 def get_svn_versioned_files(svn_wc
): 
 249     Get the list of versioned files in the SVN working copy. 
 252     for e 
in get_svn_status(svn_wc
): 
 253         if e
['path'] and e
['type'] == 'normal': 
 254             contents
.append(e
['path']) 
 257 def get_one_svn_log_entry(svn_url
, rev_start
, rev_end
, stop_on_copy
=False, get_changed_paths
=True, get_revprops
=False): 
 259     Get the first SVN log entry in the requested revision range. 
 261     entries 
= run_svn_log(svn_url
, rev_start
, rev_end
, 1, stop_on_copy
, get_changed_paths
, get_revprops
) 
 264     raise EmptySVNLog("No SVN log for %s between revisions %s and %s" % 
 265         (svn_url
, rev_start
, rev_end
)) 
 267 def get_first_svn_log_entry(svn_url
, rev_start
, rev_end
, stop_on_copy
=True, get_changed_paths
=True): 
 269     Get the first log entry after (or at) the given revision number in an SVN branch. 
 270     By default the revision number is set to 0, which will give you the log 
 271     entry corresponding to the branch creaction. 
 273     NOTE: to know whether the branch creation corresponds to an SVN import or 
 274     a copy from another branch, inspect elements of the 'changed_paths' entry 
 275     in the returned dictionary. 
 277     return get_one_svn_log_entry(svn_url
, rev_start
, rev_end
, stop_on_copy
=stop_on_copy
, get_changed_paths
=get_changed_paths
) 
 279 def get_last_svn_log_entry(svn_url
, rev_start
, rev_end
, stop_on_copy
=True, get_changed_paths
=True): 
 281     Get the last log entry before/at the given revision number in an SVN branch. 
 282     By default the revision number is set to HEAD, which will give you the log 
 283     entry corresponding to the latest commit in branch. 
 285     return get_one_svn_log_entry(svn_url
, rev_end
, rev_start
, stop_on_copy
=stop_on_copy
, get_changed_paths
=get_changed_paths
) 
 288 log_duration_threshold 
= 10.0 
 289 log_min_chunk_length 
= 10 
 290 log_max_chunk_length 
= 10000 
 292 def iter_svn_log_entries(svn_url
, first_rev
, last_rev
, stop_on_copy
=False, get_changed_paths
=True, get_revprops
=False): 
 294     Iterate over SVN log entries between first_rev and last_rev. 
 296     This function features chunked log fetching so that it isn't too nasty 
 297     to the SVN server if many entries are requested. 
 299     NOTE: This chunked log fetching *ONLY* works correctly on paths which 
 300     are known to have existed unbroken in the SVN repository, e.g. /trunk. 
 301     Chunked fetching breaks down if a path existed in earlier, then was 
 302     deleted, and later was re-created. For example, if path was created in r5, 
 303     then deleted in r1000, and then later re-created in r5000... 
 304       svn log --stop-on-copy --limit 1 -r 1:50 "path/to/file" 
 305         --> would yield r5, i.e. the _initial_ creation 
 306       svn log --stop-on-copy --limit 1 -r 1:HEAD "path/to/file" 
 307         --> would yield r5000, i.e. the _re-creation_ 
 308     In theory this might work if we always search "backwards", searching from 
 309     the end going forward rather than forward going to the end... 
 311     if last_rev 
== "HEAD": 
 312         info 
= get_svn_info(svn_url
) 
 313         last_rev 
= info
['revision'] 
 315     chunk_length 
= log_min_chunk_length
 
 316     while cur_rev 
<= last_rev
: 
 317         start_t 
= time
.time() 
 318         stop_rev 
= min(last_rev
, cur_rev 
+ chunk_length
) 
 319         entries 
= run_svn_log(svn_url
, cur_rev
, stop_rev
, chunk_length
, 
 320                               stop_on_copy
, get_changed_paths
, get_revprops
) 
 321         duration 
= time
.time() - start_t
 
 324                 if e
['revision'] > last_rev
: 
 327             if e
['revision'] >= last_rev
: 
 329             cur_rev 
= e
['revision']+1 
 331             cur_rev 
= int(stop_rev
)+1 
 332         # Adapt chunk length based on measured request duration 
 333         if duration 
< log_duration_threshold
: 
 334             chunk_length 
= min(log_max_chunk_length
, int(chunk_length 
* 2.0)) 
 335         elif duration 
> log_duration_threshold 
* 2: 
 336             chunk_length 
= max(log_min_chunk_length
, int(chunk_length 
/ 2.0)) 
 339 _svn_client_version 
= None 
 341 def get_svn_client_version(): 
 343     Returns the SVN client version as a tuple. 
 345     The returned tuple only contains numbers, non-digits in version string are 
 348     global _svn_client_version
 
 349     if _svn_client_version 
is None: 
 350         raw 
= run_svn(['--version', '-q']).strip() 
 351         _svn_client_version 
= tuple(map(int, [x 
for x 
in raw
.split('.') 
 353     return _svn_client_version
 
 356 def parse_svn_propget_xml(xml_string
): 
 358     Parse the XML output from an "svn propget" command and extract useful 
 359     information as a dict. 
 362     xml_string 
= strip_forbidden_xml_chars(xml_string
) 
 363     tree 
= ET
.fromstring(xml_string
) 
 364     prop 
= tree
.find('.//property') 
 365     d
['name'] = prop
.get('name') 
 366     d
['value'] = prop 
is not None and prop
.text 
and prop
.text
.replace('\r\n', '\n').replace('\n\r', '\n').replace('\r', '\n') or "" 
 369 def parse_svn_proplist_xml(xml_string
): 
 371     Parse the XML output from an "svn proplist" command and extract list 
 375     xml_string 
= strip_forbidden_xml_chars(xml_string
) 
 376     tree 
= ET
.fromstring(xml_string
) 
 377     for prop 
in tree
.findall('.//property'): 
 378         l
.append(prop
.get('name')) 
 381 def get_prop_value(svn_url_or_wc
, prop_name
, rev_number
=None): 
 383     Get the value of a versioned property for the given path. 
 385     args 
= ['propget', '--xml'] 
 386     url 
= str(svn_url_or_wc
) 
 388         args 
+= ['-r', rev_number
] 
 389         if not "@" in svn_url_or_wc
: 
 390             url 
= "%s@%s" % (svn_url_or_wc
, str(rev_number
)) 
 391     args 
+= [prop_name
, url
] 
 392     xml_string 
= run_svn(args
) 
 393     return parse_svn_propget_xml(xml_string
) 
 395 def get_all_props(svn_url_or_wc
, rev_number
=None): 
 397     Get the values of all versioned properties for the given path. 
 400     args 
= ['proplist', '--xml'] 
 401     url 
= str(svn_url_or_wc
) 
 403         args 
+= ['-r', rev_number
] 
 404         if not "@" in svn_url_or_wc
: 
 405             url 
= "%s@%s" % (svn_url_or_wc
, str(rev_number
)) 
 407     xml_string 
= run_svn(args
) 
 408     props 
= parse_svn_proplist_xml(xml_string
) 
 409     for prop_name 
in props
: 
 410         d 
= get_prop_value(svn_url_or_wc
, prop_name
, rev_number
) 
 411         l
[d
['name']] = d
['value']