5 Replicate (replay) changesets from one SVN repository to another: 
   6 * Maintains full logical history (e.g. uses "svn copy" for renames). 
   7 * Maintains original commit messages. 
   8 * Cannot maintain original commit date, but appends original commit date 
   9   for each commit message: "Date: %d". 
  10 * Optionally maintain source author info. (Only supported if accessing 
  11   target SVN repo via file://) 
  12 * Optionally run an external shell script before each replayed commit 
  13   to give the ability to dynamically exclude or modify files as part 
  16 License: GPLv2, the same as hgsvn. 
  17 Author: Tony Duckles (https://github.com/tonyduckles/svn2svn) 
  18 (This is a forked and modified verison of http://code.google.com/p/svn2svn/) 
  30 from optparse 
import OptionParser
 
  31 from subprocess 
import Popen
, PIPE
 
  32 from datetime 
import datetime
 
  35     from xml
.etree 
import cElementTree 
as ET
 
  38         from xml
.etree 
import ElementTree 
as ET
 
  41             import cElementTree 
as ET
 
  43             from elementtree 
import ElementTree 
as ET
 
  45 svn_log_args 
= ['log', '--xml'] 
  46 svn_info_args 
= ['info', '--xml'] 
  47 svn_checkout_args 
= ['checkout', '-q'] 
  48 svn_status_args 
= ['status', '--xml', '-v', '--ignore-externals'] 
  52 debug_runsvn_timing 
= False    # Display how long each "svn" OS command took to run? 
  53 # Setup verbosity options 
  54 runsvn_showcmd 
= False    # Display every "svn" OS command we run? 
  55 runsvn_showout 
= False    # Display the stdout results from every  "svn" OS command we run? 
  56 svnlog_verbose 
= True     # Display each action + changed-path as we walk the history? 
  58 # define exception class 
  59 class ExternalCommandFailed(RuntimeError): 
  61     An external command failed. 
  64 def display_error(message
, raise_exception 
= True): 
  66     Display error message, then terminate. 
  68     print "Error:", message
 
  71         raise ExternalCommandFailed
 
  75 # Windows compatibility code by Bill Baxter 
  77     def find_program(name
): 
  79         Find the name of the program for Popen. 
  80         Windows is finnicky about having the complete file name. Popen 
  81         won't search the %PATH% for you automatically. 
  82         (Adapted from ctypes.find_library) 
  84         # See MSDN for the REAL search order. 
  85         base
, ext 
= os
.path
.splitext(name
) 
  89             exts 
= ['.bat', '.exe'] 
  90         for directory 
in os
.environ
['PATH'].split(os
.pathsep
): 
  92                 fname 
= os
.path
.join(directory
, base 
+ e
) 
  93                 if os
.path
.exists(fname
): 
  97     def find_program(name
): 
  99         Find the name of the program for Popen. 
 100         On Unix, popen isn't picky about having absolute paths. 
 109     return q 
+ s
.replace('\\', '\\\\').replace("'", "'\"'\"'") + q
 
 111 locale_encoding 
= locale
.getpreferredencoding() 
 113 def run_svn(args
, fail_if_stderr
=False, encoding
="utf-8"): 
 116     exit if svn cmd failed 
 118     def _transform_arg(a
): 
 119         if isinstance(a
, unicode): 
 120             a 
= a
.encode(encoding 
or locale_encoding
) 
 121         elif not isinstance(a
, str): 
 124     t_args 
= map(_transform_arg
, args
) 
 126     cmd 
= find_program("svn") 
 127     cmd_string 
= str(" ".join(map(shell_quote
, [cmd
] + t_args
))) 
 129         print "$", "("+os
.getcwd()+")", cmd_string
 
 130     if debug_runsvn_timing
: 
 132     pipe 
= Popen([cmd
] + t_args
, executable
=cmd
, stdout
=PIPE
, stderr
=PIPE
) 
 133     out
, err 
= pipe
.communicate() 
 134     if debug_runsvn_timing
: 
 136         print "(" + str(round(time2
-time1
,4)) + " elapsed)" 
 137     if out 
and runsvn_showout
: 
 139     if pipe
.returncode 
!= 0 or (fail_if_stderr 
and err
.strip()): 
 140         display_error("External program failed (return code %d): %s\n%s" 
 141             % (pipe
.returncode
, cmd_string
, err
)) 
 144 def svn_date_to_timestamp(svn_date
): 
 146     Parse an SVN date as read from the XML output and 
 147     return the corresponding timestamp. 
 149     # Strip microseconds and timezone (always UTC, hopefully) 
 150     # XXX there are various ISO datetime parsing routines out there, 
 151     # cf. http://seehuhn.de/comp/pdate 
 152     date 
= svn_date
.split('.', 2)[0] 
 153     time_tuple 
= time
.strptime(date
, "%Y-%m-%dT%H:%M:%S") 
 154     return calendar
.timegm(time_tuple
) 
 156 def parse_svn_info_xml(xml_string
): 
 158     Parse the XML output from an "svn info" command and extract 
 159     useful information as a dict. 
 162     tree 
= ET
.fromstring(xml_string
) 
 163     entry 
= tree
.find('.//entry') 
 165         d
['url'] = entry
.find('url').text
 
 166         d
['revision'] = int(entry
.get('revision')) 
 167         d
['repos_url'] = tree
.find('.//repository/root').text
 
 168         d
['last_changed_rev'] = int(tree
.find('.//commit').get('revision')) 
 169         d
['kind'] = entry
.get('kind') 
 172 def parse_svn_log_xml(xml_string
): 
 174     Parse the XML output from an "svn log" command and extract 
 175     useful information as a list of dicts (one per log changeset). 
 178     tree 
= ET
.fromstring(xml_string
) 
 179     for entry 
in tree
.findall('logentry'): 
 181         d
['revision'] = int(entry
.get('revision')) 
 182         # Some revisions don't have authors, most notably 
 183         # the first revision in a repository. 
 184         author 
= entry
.find('author') 
 185         d
['author'] = author 
is not None and author
.text 
or None 
 186         d
['date'] = svn_date_to_timestamp(entry
.find('date').text
) 
 187         # Some revisions may have empty commit message 
 188         message 
= entry
.find('msg') 
 189         message 
= message 
is not None and message
.text 
is not None \
 
 190                         and message
.text
.strip() or "" 
 191         # Replace DOS return '\r\n' and MacOS return '\r' with unix return '\n' 
 192         d
['message'] = message
.replace('\r\n', '\n').replace('\n\r', '\n'). \
 
 194         paths 
= d
['changed_paths'] = [] 
 195         for path 
in entry
.findall('.//path'): 
 196             copyfrom_rev 
= path
.get('copyfrom-rev') 
 198                 copyfrom_rev 
= int(copyfrom_rev
) 
 201                 'kind': path
.get('kind'), 
 202                 'action': path
.get('action'), 
 203                 'copyfrom_path': path
.get('copyfrom-path'), 
 204                 'copyfrom_revision': copyfrom_rev
, 
 206         # Need to sort paths (i.e. into hierarchical order), so that process_svn_log_entry() 
 207         # can process actions in depth-first order. 
 212 def parse_svn_status_xml(xml_string
, base_dir
=None): 
 214     Parse the XML output from an "svn status" command and extract 
 215     useful info as a list of dicts (one per status entry). 
 218     tree 
= ET
.fromstring(xml_string
) 
 219     for entry 
in tree
.findall('.//entry'): 
 221         path 
= entry
.get('path') 
 222         if base_dir 
is not None: 
 223             assert path
.startswith(base_dir
) 
 224             path 
= path
[len(base_dir
):].lstrip('/\\') 
 226         wc_status 
= entry
.find('wc-status') 
 227         if wc_status
.get('item') == 'external': 
 228             d
['type'] = 'external' 
 229         elif wc_status
.get('revision') is not None: 
 232             d
['type'] = 'unversioned' 
 236 def get_svn_info(svn_url_or_wc
, rev_number
=None): 
 238     Get SVN information for the given URL or working copy, 
 239     with an optionally specified revision number. 
 240     Returns a dict as created by parse_svn_info_xml(). 
 242     if rev_number 
is not None: 
 243         args 
= [svn_url_or_wc 
+ "@" + str(rev_number
)] 
 245         args 
= [svn_url_or_wc
] 
 246     xml_string 
= run_svn(svn_info_args 
+ args
, fail_if_stderr
=True) 
 247     return parse_svn_info_xml(xml_string
) 
 249 def svn_checkout(svn_url
, checkout_dir
, rev_number
=None): 
 251     Checkout the given URL at an optional revision number. 
 254     if rev_number 
is not None: 
 255         args 
+= ['-r', rev_number
] 
 256     args 
+= [svn_url
, checkout_dir
] 
 257     return run_svn(svn_checkout_args 
+ args
) 
 259 def run_svn_log(svn_url_or_wc
, rev_start
, rev_end
, limit
, stop_on_copy
=False, get_changed_paths
=True): 
 261     Fetch up to 'limit' SVN log entries between the given revisions. 
 264         args 
= ['--stop-on-copy'] 
 267     url 
= str(svn_url_or_wc
) 
 268     if rev_start 
!= 'HEAD' and rev_end 
!= 'HEAD': 
 269         args 
+= ['-r', '%s:%s' % (rev_start
, rev_end
)] 
 270         if not "@" in svn_url_or_wc
: 
 271             url 
+= "@" + str(max(rev_start
, rev_end
)) 
 272     if get_changed_paths
: 
 274     args 
+= ['--limit', str(limit
), url
] 
 275     xml_string 
= run_svn(svn_log_args 
+ args
) 
 276     return parse_svn_log_xml(xml_string
) 
 278 def get_svn_status(svn_wc
, flags
=None): 
 280     Get SVN status information about the given working copy. 
 282     # Ensure proper stripping by canonicalizing the path 
 283     svn_wc 
= os
.path
.abspath(svn_wc
) 
 288     xml_string 
= run_svn(svn_status_args 
+ args
) 
 289     return parse_svn_status_xml(xml_string
, svn_wc
) 
 291 def get_one_svn_log_entry(svn_url
, rev_start
, rev_end
, stop_on_copy
=False, get_changed_paths
=True): 
 293     Get the first SVN log entry in the requested revision range. 
 295     entries 
= run_svn_log(svn_url
, rev_start
, rev_end
, 1, stop_on_copy
, get_changed_paths
) 
 297         display_error("No SVN log for %s between revisions %s and %s" % 
 298                       (svn_url
, rev_start
, rev_end
)) 
 302 def get_first_svn_log_entry(svn_url
, rev_start
, rev_end
, get_changed_paths
=True): 
 304     Get the first log entry after/at the given revision number in an SVN branch. 
 305     By default the revision number is set to 0, which will give you the log 
 306     entry corresponding to the branch creaction. 
 308     NOTE: to know whether the branch creation corresponds to an SVN import or 
 309     a copy from another branch, inspect elements of the 'changed_paths' entry 
 310     in the returned dictionary. 
 312     return get_one_svn_log_entry(svn_url
, rev_start
, rev_end
, stop_on_copy
=True, get_changed_paths
=True) 
 314 def get_last_svn_log_entry(svn_url
, rev_start
, rev_end
, get_changed_paths
=True): 
 316     Get the last log entry before/at the given revision number in an SVN branch. 
 317     By default the revision number is set to HEAD, which will give you the log 
 318     entry corresponding to the latest commit in branch. 
 320     return get_one_svn_log_entry(svn_url
, rev_end
, rev_start
, stop_on_copy
=True, get_changed_paths
=True) 
 323 log_duration_threshold 
= 10.0 
 324 log_min_chunk_length 
= 10 
 326 def iter_svn_log_entries(svn_url
, first_rev
, last_rev
): 
 328     Iterate over SVN log entries between first_rev and last_rev. 
 330     This function features chunked log fetching so that it isn't too nasty 
 331     to the SVN server if many entries are requested. 
 334     chunk_length 
= log_min_chunk_length
 
 335     chunk_interval_factor 
= 1.0 
 336     while last_rev 
== "HEAD" or cur_rev 
<= last_rev
: 
 337         start_t 
= time
.time() 
 338         stop_rev 
= min(last_rev
, cur_rev 
+ int(chunk_length 
* chunk_interval_factor
)) 
 339         entries 
= run_svn_log(svn_url
, cur_rev
, stop_rev
, chunk_length
) 
 340         duration 
= time
.time() - start_t
 
 342             if stop_rev 
== last_rev
: 
 344             cur_rev 
= stop_rev 
+ 1 
 345             chunk_interval_factor 
*= 2.0 
 349         cur_rev 
= e
['revision'] + 1 
 350         # Adapt chunk length based on measured request duration 
 351         if duration 
< log_duration_threshold
: 
 352             chunk_length 
= int(chunk_length 
* 2.0) 
 353         elif duration 
> log_duration_threshold 
* 2: 
 354             chunk_length 
= max(log_min_chunk_length
, int(chunk_length 
/ 2.0)) 
 356 def commit_from_svn_log_entry(entry
, files
=None, keep_author
=False): 
 358     Given an SVN log entry and an optional sequence of files, do an svn commit. 
 360     # TODO: Run optional external shell hook here, for doing pre-commit filtering 
 361     # This will use the local timezone for displaying commit times 
 362     timestamp 
= int(entry
['date']) 
 363     svn_date 
= str(datetime
.fromtimestamp(timestamp
)) 
 364     # Uncomment this one one if you prefer UTC commit times 
 365     #svn_date = "%d 0" % timestamp 
 367         options 
= ["ci", "--force-log", "-m", entry
['message'] + "\nDate: " + svn_date
, "--username", entry
['author']] 
 369         options 
= ["ci", "--force-log", "-m", entry
['message'] + "\nDate: " + svn_date 
+ "\nAuthor: " + entry
['author']] 
 371         options 
+= list(files
) 
 376     Check if a given file/folder is being tracked by Subversion. 
 377     Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories. 
 378     With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy. 
 379     Use "svn status" to check the status of the file/folder. 
 381     # TODO: Is there a better way to do this? 
 382     entries 
= get_svn_status(p
) 
 386     return (d
['type'] == 'normal') 
 388 def find_svn_ancestors(source_repos_url
, source_base
, source_offset
, copyfrom_path
, copyfrom_rev
): 
 390     Given a copy-from path (copyfrom_path), walk the SVN history backwards to inspect 
 391     the ancestory of that path. Build a collection of copyfrom_path+revision pairs 
 392     for each of the branch-copies since the initial branch-creation.  If we find a 
 393     copyfrom_path which source_base is a substring match of (e.g. we crawled back to 
 394     the initial branch-copy from trunk), then return the collection of ancestor paths. 
 395     Otherwise, copyfrom_path has no ancestory compared to source_base. 
 397     This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a 
 398     file/folder was renamed in a branch and then that branch was merged back to trunk. 
 401     * source_repos_url = Full URL to root of repository, e.g. 'file:///path/to/repos' 
 402     * source_base = e.g. '/trunk' 
 403     * source_offset = e.g. 'projectA/file1.txt' 
 404     * copyfrom_path = e.g. '/branches/bug123/projectA/file1.txt' 
 408     working_path 
= copyfrom_path
 
 409     working_base 
= copyfrom_path
[:-len(source_offset
)].rstrip('/') 
 410     working_offset 
= source_offset
.strip('/') 
 411     working_rev 
= copyfrom_rev
 
 412     ancestors 
= [{'path': [working_base, working_offset], 'revision': working_rev}
] 
 414         # Get the first "svn log" entry for this path (relative to @rev) 
 415         #working_path = working_base + "/" + working_offset 
 417             print ">> find_svn_ancestors: " + source_repos_url 
+ working_path 
+ "@" + str(working_rev
) + \
 
 418                    "  (" + working_base 
+ " " + working_offset 
+ ")" 
 419         log_entry 
= get_first_svn_log_entry(source_repos_url 
+ working_path 
+ "@" + str(working_rev
), 1, str(working_rev
), True) 
 422         # Find the action for our working_path in this revision 
 423         for d 
in log_entry
['changed_paths']: 
 425             if not path 
in working_path
: 
 427             # Check action-type for this file 
 429             if action 
not in 'MARD': 
 430                 display_error("In SVN rev. %d: action '%s' not supported. \ 
 431                                Please report a bug!" % (log_entry
['revision'], action
)) 
 433                 debug_desc 
= ": " + action 
+ " " + path
 
 434                 if d
['copyfrom_path']: 
 435                     debug_desc 
+= " (from " + d
['copyfrom_path'] + "@" + str(d
['copyfrom_revision']) + ")" 
 439                 # If file/folder was replaced, it has no ancestor 
 442                 # If file/folder was deleted, it has no ancestor 
 445                 # If file/folder was added but not a copy, it has no ancestor 
 446                 if not d
['copyfrom_path']: 
 448                 # Else, file/folder was added and is a copy, so check ancestors 
 449                 path_old 
= d
['copyfrom_path'] 
 450                 working_path 
= working_path
.replace(path
, path_old
) 
 451                 if working_base 
in working_path
: 
 452                     # If the new and old working_path share the same working_base, just need to update working_offset. 
 453                     working_offset 
= working_path
[len(working_base
)+1:] 
 455                     # Else, assume that working_base has changed but working_offset is the same, e.g. a re-branch. 
 456                     # TODO: Is this a safe assumption?! 
 457                     working_base 
= working_path
[:-len(working_offset
)].rstrip('/') 
 458                 working_rev 
= d
['copyfrom_revision'] 
 460                     print ">> find_svn_ancestors: copy-from: " + working_base 
+ " " + working_offset 
+ "@" + str(working_rev
) 
 461                 ancestors
.append({'path': [working_base, working_offset], 'revision': working_rev}
) 
 462                 # If we found a copy-from case which matches our source_base, we're done 
 463                 if (path_old 
== source_base
) or (path_old
.startswith(source_base 
+ "/")): 
 465                 # Else, follow the copy and keep on searching 
 469 def replay_svn_ancestors(ancestors
, source_repos_url
, source_url
, target_url
): 
 471     Given an array of ancestor info (find_svn_ancestors), replay the history 
 472     to correctly track renames ("svn copy/move") across branch-merges. 
 474     For example, consider a sequence of events like this: 
 475     1. svn copy /trunk /branches/fix1 
 476     2. (Make some changes on /branches/fix1) 
 477     3. svn copy /branches/fix1/Proj1 /branches/fix1/Proj2  " Rename folder 
 478     4. svn copy /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt  " Rename file inside renamed folder 
 479     5. svn co /trunk && svn merge /branches/fix1 
 480     After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1 
 481     and and add of /trunk/Proj2 comp-from /branches/fix1/Proj2. If we were just 
 482     to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder, 
 483     we'd lose the logical history that Proj2/file2.txt is really a descendant 
 486     'source_repos_url' is the full URL to the root of the source repository. 
 487     'ancestors' is the array returned by find_svn_ancestors() with the final 
 488       destination info appended to it by process_svn_log_entry(). 
 491     # Ignore ancestors[0], which is the original (pre-branch-copy) trunk path 
 492     # Ignore ancestors[1], which is the original branch-creation commit 
 493     # Ignore ancestors[n], which is the final commit back to trunk 
 494     for idx 
in range(1, len(ancestors
)-1): 
 495         ancestor 
= ancestors
[idx
] 
 496         source_base 
= ancestor
['path'][0] 
 497         source_offset 
= ancestor
['path'][1] 
 498         source_path 
= source_base 
+ "/" + source_offset
 
 499         source_rev 
= ancestor
['revision'] 
 500         source_rev_next 
= ancestors
[idx
+1]['revision'] 
 501         # Do a "svn log" on the _parent_ directory of source_path, since trying to get log info 
 502         # for the "old path" on the revision where the copy/move happened will fail. 
 503         if "/" in source_path
: 
 504             p_source_path 
= source_path
[:source_path
.rindex('/')] 
 508             print ">> replay_svn_ancestors: ["+str(idx
)+"]" + source_path
+"@"+str(source_rev
) + "  ["+p_source_path
+"@"+str(source_rev
)+":"+str(source_rev_next
-1)+"]" 
 509         it_log_entries 
= iter_svn_log_entries(source_repos_url
+p_source_path
, source_rev
, source_rev_next
-1) 
 510         for log_entry 
in it_log_entries
: 
 511             #print ">> replay_svn_ancestors: log_entry: (" + source_repos_url+source_base + ")" 
 513             process_svn_log_entry(log_entry
, source_repos_url
, source_repos_url
+source_base
, target_url
) 
 515 def process_svn_log_entry(log_entry
, source_repos_url
, source_url
, target_url
, source_offset
=""): 
 517     Process SVN changes from the given log entry. 
 518     Returns array of all the paths in the working-copy that were changed, 
 519     i.e. the paths which need to be "svn commit". 
 521     'log_entry' is the array structure built by parse_svn_log_xml(). 
 522     'source_repos_url' is the full URL to the root of the source repository. 
 523     'source_url' is the full URL to the source path in the source repository. 
 524     'target_url' is the full URL to the target path in the target repository. 
 526     # Get the relative offset of source_url based on source_repos_url, e.g. u'/branches/bug123' 
 527     source_base 
= source_url
[len(source_repos_url
):] 
 529         print ">> process_svn_log_entry: " + source_url 
+ " (" + source_base 
+ ")" 
 531     svn_rev 
= log_entry
['revision'] 
 538     for d 
in log_entry
['changed_paths']: 
 540             msg 
= " " + d
['action'] + " " + d
['path'] 
 541             if d
['copyfrom_path']: 
 542                 msg 
+= " (from " + d
['copyfrom_path'] + "@" + str(d
['copyfrom_revision']) + ")" 
 544         # Get the full path for this changed_path 
 545         # e.g. u'/branches/bug123/projectA/file1.txt' 
 547         if not path
.startswith(source_base 
+ "/"): 
 548             # Ignore changed files that are not part of this subdir 
 549             if path 
!= source_base
: 
 550                 print ">> process_svn_log_entry: Unrelated path: " + path 
+ "  (" + source_base 
+ ")" 
 551                 unrelated_paths
.append(path
) 
 553         # Calculate the offset (based on source_base) for this changed_path 
 554         # e.g. u'projectA/file1.txt' 
 555         # (path = source_base + "/" + path_offset) 
 556         path_offset 
= path
[len(source_base
):].strip("/") 
 557         # Get the action for this path 
 559         if action 
not in 'MARD': 
 560             display_error("In SVN rev. %d: action '%s' not supported. \ 
 561                            Please report a bug!" % (svn_rev
, action
)) 
 563         # Try to be efficient and keep track of an explicit list of paths in the 
 564         # working copy that changed. If we commit from the root of the working copy, 
 565         # then SVN needs to crawl the entire working copy looking for pending changes. 
 566         # But, if we gather too many paths to commit, then we wipe commit_paths below 
 567         # and end-up doing a commit at the root of the working-copy. 
 568         if len (commit_paths
) < 100: 
 569             commit_paths
.append(path_offset
) 
 571         # Special-handling for replace's 
 574             # If file was "replaced" (deleted then re-added, all in same revision), 
 575             # then we need to run the "svn rm" first, then change action='A'. This 
 576             # lets the normal code below handle re-"svn add"'ing the files. This 
 577             # should replicate the "replace". 
 578             run_svn(["up", path_offset
]) 
 579             run_svn(["remove", "--force", path_offset
]) 
 583         # Handle all the various action-types 
 584         # (Handle "add" first, for "svn copy/move" support) 
 586             # Determine where to export from 
 587             copyfrom_rev 
= svn_rev
 
 590             # Handle cases where this "add" was a copy from another URL in the source repos 
 591             if d
['copyfrom_revision']: 
 592                 copyfrom_rev 
= d
['copyfrom_revision'] 
 593                 copyfrom_path 
= d
['copyfrom_path'] 
 595                     print ">> process_svn_log_entry: copy-to: " + source_base 
+ " " + source_offset 
+ " " + path_offset
 
 596                 if source_base 
in copyfrom_path
: 
 597                     # If the copy-from path is inside the current working-copy, no need to check ancestry. 
 599                     copyfrom_path 
= copyfrom_path
[len(source_base
):].strip("/") 
 601                         print ">> process_svn_log_entry: Found copy: " + copyfrom_path
+"@"+str(copyfrom_rev
) 
 604                     ancestors 
= find_svn_ancestors(source_repos_url
, source_base
, path_offset
, 
 605                                                    copyfrom_path
, copyfrom_rev
) 
 607                     # Reverse the list, so that we loop in chronological order 
 609                     # Append the current revision 
 610                     ancestors
.append({'path': [source_base, path_offset], 'revision': svn_rev}
) 
 611                     # ancestors[0] is the original (pre-branch-copy) trunk path. 
 612                     # ancestors[1] is the first commit on the new branch. 
 613                     copyfrom_rev 
=  ancestors
[0]['revision'] 
 614                     copyfrom_base 
= ancestors
[0]['path'][0] 
 615                     copyfrom_offset 
= ancestors
[0]['path'][1] 
 616                     copyfrom_path 
= copyfrom_base 
+ copyfrom_offset
 
 618                         print ">> process_svn_log_entry: FOUND PARENT:" 
 619                         for idx 
in range(0,len(ancestors
)): 
 620                             ancestor 
= ancestors
[idx
] 
 621                             print "     ["+str(idx
)+"] " + ancestor
['path'][0]+" "+ancestor
['path'][1]+"@"+str(ancestor
['revision']) 
 622                     #print ">> process_svn_log_entry: copyfrom_path (before): " + copyfrom_path + " source_base: " + source_base + " p: " + p 
 623                     copyfrom_path 
= copyfrom_path
[len(source_base
):].strip("/") 
 624                     #print ">> process_svn_log_entry: copyfrom_path (after): " + copyfrom_path 
 626             # If this add was a copy-from, do a smart replay of the ancestors' history. 
 627             # Else just copy/export the files from the source repo and "svn add" them. 
 630                     print ">> process_svn_log_entry: svn_copy: copy-from: " + copyfrom_path
+"@"+str(copyfrom_rev
) + "  source_base: "+source_base 
+ "  len(ancestors): " + str(len(ancestors
)) 
 631                 # If we don't have any ancestors, then this is just a straight "svn copy" in the current working-copy. 
 633                     # ...but not if the target is already tracked, because this might run several times for the same path. 
 634                     # TODO: Is there a better way to avoid recusion bugs? Maybe a collection of processed paths? 
 635                     if not in_svn(path_offset
): 
 636                         run_svn(["copy", copyfrom_path
, path_offset
]) 
 638                     # Replay any actions which happened to this folder from the ancestor path(s). 
 639                     replay_svn_ancestors(ancestors
, source_repos_url
, source_url
, target_url
) 
 641                 # Create (parent) directory if needed 
 642                 if d
['kind'] == 'dir': 
 645                     p_path 
= os
.path
.dirname(path_offset
).strip() or '.' 
 646                 if not os
.path
.exists(p_path
): 
 648                 # Export the entire added tree. 
 649                 run_svn(["export", "--force", "-r", str(copyfrom_rev
), 
 650                          source_repos_url 
+ copyfrom_path 
+ "@" + str(copyfrom_rev
), path_offset
]) 
 651                 # TODO: The "no in_svn" condition here is wrong for replace cases. 
 652                 #       Added the in_svn condition here originally since "svn export" is recursive 
 653                 #       but "svn log" will have an entry for each indiv file, hence we run into a 
 654                 #       cannot-re-add-file-which-is-already-added issue. 
 655                 if (not in_svn(path_offset
)) or (is_replace
): 
 656                     run_svn(["add", "--parents", path_offset
]) 
 657                 # TODO: Need to copy SVN properties from source repos 
 660             # Queue "svn remove" commands, to allow the action == 'A' handling the opportunity 
 661             # to do smart "svn copy" handling on copy/move/renames. 
 662             removed_paths
.append(path_offset
) 
 666             display_error("Internal Error: Handling for action='R' not implemented yet.") 
 669             modified_paths
.append(path_offset
) 
 672             display_error("Internal Error: pull_svn_rev: Unhandled 'action' value: '" + action 
+ "'") 
 675         for r 
in removed_paths
: 
 676             # TODO: Is the "svn up" here needed? 
 678             run_svn(["remove", "--force", r
]) 
 681         for m 
in modified_paths
: 
 682             # TODO: Is the "svn up" here needed? 
 684             m_url 
= source_url 
+ "/" + m
 
 685             out 
= run_svn(["merge", "-c", str(svn_rev
), "--non-recursive", 
 686                      "--non-interactive", "--accept=theirs-full", 
 687                      m_url
+"@"+str(svn_rev
), m
]) 
 690         print "Unrelated paths: (vs. '" + source_base 
+ "')" 
 691         print "*", unrelated_paths
 
 695 def pull_svn_rev(log_entry
, source_repos_url
, source_url
, target_url
, keep_author
=False): 
 697     Pull SVN changes from the given log entry. 
 698     Returns the new SVN revision. 
 699     If an exception occurs, it will rollback to revision 'svn_rev - 1'. 
 701     ## Get the relative offset of source_url based on source_repos_url, e.g. u'/branches/bug123' 
 702     #source_base = source_url[len(source_repos_url):] 
 704     svn_rev 
= log_entry
['revision'] 
 705     print "\n(Starting source rev #"+str(svn_rev
)+":)" 
 706     print "r"+str(log_entry
['revision']) + " | " + \
 
 707           log_entry
['author'] + " | " + \
 
 708           str(datetime
.fromtimestamp(int(log_entry
['date'])).isoformat(' ')) 
 709     print log_entry
['message'] 
 710     print "------------------------------------------------------------------------" 
 711     commit_paths 
= process_svn_log_entry(log_entry
, source_repos_url
, source_url
, target_url
) 
 713     # If we had too many individual paths to commit, wipe the list and just commit at 
 714     # the root of the working copy. 
 715     if len (commit_paths
) > 99: 
 718     # TODO: Use SVN properties to track source URL + rev in the target repo? 
 719     #       This would provide a more reliable resume-support 
 721         commit_from_svn_log_entry(log_entry
, commit_paths
, keep_author
=keep_author
) 
 722     except ExternalCommandFailed
: 
 723         # try to ignore the Properties conflicts on files and dirs 
 724         # use the copy from original_wc 
 725         # TODO: Need to re-work this? 
 726         #has_Conflict = False 
 727         #for d in log_entry['changed_paths']: 
 729         #    p = p[len(source_base):].strip("/") 
 730         #    if os.path.isfile(p): 
 731         #        if os.path.isfile(p + ".prej"): 
 732         #            has_Conflict = True 
 733         #            shutil.copy(original_wc + os.sep + p, p) 
 734         #            p2=os.sep + p.replace('_', '__').replace('/', '_') \ 
 735         #                      + ".prej-" + str(svn_rev) 
 736         #            shutil.move(p + ".prej", os.path.dirname(original_wc) + p2) 
 737         #            w="\n### Properties conflicts ignored:" 
 738         #            print "%s %s, in revision: %s\n" % (w, p, svn_rev) 
 739         #    elif os.path.isdir(p): 
 740         #        if os.path.isfile(p + os.sep + "dir_conflicts.prej"): 
 741         #            has_Conflict = True 
 742         #            p2=os.sep + p.replace('_', '__').replace('/', '_') \ 
 743         #                      + "_dir__conflicts.prej-" + str(svn_rev) 
 744         #            shutil.move(p + os.sep + "dir_conflicts.prej", 
 745         #                        os.path.dirname(original_wc) + p2) 
 746         #            w="\n### Properties conflicts ignored:" 
 747         #            print "%s %s, in revision: %s\n" % (w, p, svn_rev) 
 748         #            out = run_svn(["propget", "svn:ignore", 
 749         #                           original_wc + os.sep + p]) 
 751         #                run_svn(["propset", "svn:ignore", out.strip(), p]) 
 752         #            out = run_svn(["propget", "svn:externel", 
 753         #                           original_wc + os.sep + p]) 
 755         #                run_svn(["propset", "svn:external", out.strip(), p]) 
 758         #    commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author) 
 760             raise ExternalCommandFailed
 
 761     print "(Finished source rev #"+str(svn_rev
)+")" 
 765     usage 
= "Usage: %prog [-a] [-c] [-r SVN rev] <Source SVN URL> <Target SVN URL>" 
 766     parser 
= OptionParser(usage
) 
 767     parser
.add_option("-a", "--keep-author", action
="store_true", 
 768                       dest
="keep_author", help="Keep revision Author or not") 
 769     parser
.add_option("-c", "--continue-from-break", action
="store_true", 
 770                       dest
="cont_from_break", 
 771                       help="Continue from previous break") 
 772     parser
.add_option("-r", "--svn-rev", type="int", dest
="svn_rev", 
 773                       help="SVN revision to checkout from") 
 774     (options
, args
) = parser
.parse_args() 
 776         display_error("incorrect number of arguments\n\nTry: svn2svn.py --help", 
 779     source_url 
= args
.pop(0).rstrip("/") 
 780     target_url 
= args
.pop(0).rstrip("/") 
 781     if options
.keep_author
: 
 786     # Find the greatest_rev in the source repo 
 787     svn_info 
= get_svn_info(source_url
) 
 788     greatest_rev 
= svn_info
['revision'] 
 792     # if old working copy does not exist, disable continue mode 
 793     # TODO: Better continue support. Maybe include source repo's rev # in target commit info? 
 794     if not os
.path
.exists(dup_wc
): 
 795         options
.cont_from_break 
= False 
 797     if not options
.cont_from_break
: 
 798         # Warn if Target SVN URL existed 
 799         cmd 
= find_program("svn") 
 800         pipe 
= Popen([cmd
] + ["list"] + [target_url
], executable
=cmd
, 
 801                      stdout
=PIPE
, stderr
=PIPE
) 
 802         out
, err 
= pipe
.communicate() 
 803         if pipe
.returncode 
== 0: 
 804             print "Target SVN URL: %s existed!" % target_url
 
 807             print "Press 'Enter' to Continue, 'Ctrl + C' to Cancel..." 
 808             print "(Timeout in 5 seconds)" 
 809             rfds
, wfds
, efds 
= select
.select([sys
.stdin
], [], [], 5) 
 811         # Get log entry for the SVN revision we will check out 
 813             # If specify a rev, get log entry just before or at rev 
 814             svn_start_log 
= get_last_svn_log_entry(source_url
, 1, options
.svn_rev
, False) 
 816             # Otherwise, get log entry of branch creation 
 817             # TODO: This call is *very* expensive on a repo with lots of revisions. 
 818             #       Even though the call is passing --limit 1, it seems like that limit-filter 
 819             #       is happening after SVN has fetched the full log history. 
 820             svn_start_log 
= get_first_svn_log_entry(source_url
, 1, greatest_rev
, False) 
 822         # This is the revision we will start from for source_url 
 823         svn_rev 
= svn_start_log
['revision'] 
 825         # Check out a working copy of target_url 
 826         dup_wc 
= os
.path
.abspath(dup_wc
) 
 827         if os
.path
.exists(dup_wc
): 
 828             shutil
.rmtree(dup_wc
) 
 829         svn_checkout(target_url
, dup_wc
) 
 832         # For the initial commit to the target URL, export all the contents from 
 833         # the source URL at the start-revision. 
 834         paths 
= run_svn(["list", "-r", str(svn_rev
), source_url
+"@"+str(svn_rev
)]) 
 835         paths 
= paths
.strip("\n").split("\n") 
 840             # Directories have a trailing slash in the "svn list" output 
 842                 path
=path
.rstrip('/') 
 843                 if not os
.path
.exists(path
): 
 845             run_svn(["export", "--force", "-r" , str(svn_rev
), source_url
+"/"+path
+"@"+str(svn_rev
), path
]) 
 846             run_svn(["add", path
]) 
 847         commit_from_svn_log_entry(svn_start_log
, [], keep_author
) 
 849         dup_wc 
= os
.path
.abspath(dup_wc
) 
 853     svn_info 
= get_svn_info(source_url
) 
 854     # Get the base URL for the source repos, e.g. u'svn://svn.example.com/svn/repo' 
 855     source_repos_url 
= svn_info
['repos_url'] 
 857     if options
.cont_from_break
: 
 858         svn_rev 
= svn_info
['revision'] - 1 
 862     # Load SVN log starting from svn_rev + 1 
 863     it_log_entries 
= iter_svn_log_entries(source_url
, svn_rev 
+ 1, greatest_rev
) 
 866         for log_entry 
in it_log_entries
: 
 867             # Replay this revision from source_url into target_url 
 868             pull_svn_rev(log_entry
, source_repos_url
, source_url
, target_url
, keep_author
) 
 869             # Update our target working-copy, to ensure everything says it's at the new HEAD revision 
 870             run_svn(["up", dup_wc
]) 
 872     except KeyboardInterrupt: 
 873         print "\nStopped by user." 
 875         run_svn(["revert", "--recursive", "."]) 
 877         print "\nCommand failed with following error:\n" 
 878         traceback
.print_exc() 
 880         run_svn(["revert", "--recursive", "."]) 
 886 if __name__ 
== "__main__":