svn2svn.py

   1 #!/usr/bin/env python
   2 """
   3 svn2svn.py
   4
   5 Replicate (replay) changesets from one SVN repository to another:
   6 * Maintains full logical history (e.g. uses "svn copy" for renames).
   7 * Maintains original commit messages.
   8 * Cannot maintain original commit date, but appends original commit date
   9   for each commit message: "Date: %d".
  10 * Optionally maintain source author info. (Only supported if accessing
  11   target SVN repo via file://)
  12 * Optionally run an external shell script before each replayed commit
  13   to give the ability to dynamically exclude or modify files as part
  14   of the replay.
  15
  16 License: GPLv2, the same as hgsvn.
  17 Author: Tony Duckles (https://github.com/tonyduckles/svn2svn)
  18 (This is a forked and modified verison of http://code.google.com/p/svn2svn/)
  19 """
  20
  21 import os
  22 import sys
  23 import time
  24 import locale
  25 import shutil
  26 import select
  27 import calendar
  28 import traceback
  29
  30 from optparse import OptionParser
  31 from subprocess import Popen, PIPE
  32 from datetime import datetime
  33 from operator import itemgetter
  34
  35 try:
  36     from xml.etree import cElementTree as ET
  37 except ImportError:
  38     try:
  39         from xml.etree import ElementTree as ET
  40     except ImportError:
  41         try:
  42             import cElementTree as ET
  43         except ImportError:
  44             from elementtree import ElementTree as ET
  45
  46 svn_log_args = ['log', '--xml']
  47 svn_info_args = ['info', '--xml']
  48 svn_checkout_args = ['checkout', '-q']
  49 svn_status_args = ['status', '--xml', '-v', '--ignore-externals']
  50
  51 # Setup debug options
  52 debug = False
  53 debug_runsvn_timing = False    # Display how long each "svn" OS command took to run?
  54 # Setup verbosity options
  55 runsvn_showcmd = False    # Display every "svn" OS command we run?
  56 runsvn_showout = False    # Display the stdout results from every  "svn" OS command we run?
  57 svnlog_verbose = True     # Display each action + changed-path as we walk the history?
  58
  59 # define exception class
  60 class ExternalCommandFailed(RuntimeError):
  61     """
  62     An external command failed.
  63     """
  64
  65 def display_error(message, raise_exception = True):
  66     """
  67     Display error message, then terminate.
  68     """
  69     print "Error:", message
  70     print
  71     if raise_exception:
  72         raise ExternalCommandFailed
  73     else:
  74         sys.exit(1)
  75
  76 # Windows compatibility code by Bill Baxter
  77 if os.name == "nt":
  78     def find_program(name):
  79         """
  80         Find the name of the program for Popen.
  81         Windows is finnicky about having the complete file name. Popen
  82         won't search the %PATH% for you automatically.
  83         (Adapted from ctypes.find_library)
  84         """
  85         # See MSDN for the REAL search order.
  86         base, ext = os.path.splitext(name)
  87         if ext:
  88             exts = [ext]
  89         else:
  90             exts = ['.bat', '.exe']
  91         for directory in os.environ['PATH'].split(os.pathsep):
  92             for e in exts:
  93                 fname = os.path.join(directory, base + e)
  94                 if os.path.exists(fname):
  95                     return fname
  96         return None
  97 else:
  98     def find_program(name):
  99         """
 100         Find the name of the program for Popen.
 101         On Unix, popen isn't picky about having absolute paths.
 102         """
 103         return name
 104
 105 def shell_quote(s):
 106     if os.name == "nt":
 107         q = '"'
 108     else:
 109         q = "'"
 110     return q + s.replace('\\', '\\\\').replace("'", "'\"'\"'") + q
 111
 112 locale_encoding = locale.getpreferredencoding()
 113
 114 def run_svn(args, fail_if_stderr=False, encoding="utf-8"):
 115     """
 116     Run svn cmd in PIPE
 117     exit if svn cmd failed
 118     """
 119     def _transform_arg(a):
 120         if isinstance(a, unicode):
 121             a = a.encode(encoding or locale_encoding)
 122         elif not isinstance(a, str):
 123             a = str(a)
 124         return a
 125     t_args = map(_transform_arg, args)
 126
 127     cmd = find_program("svn")
 128     cmd_string = str(" ".join(map(shell_quote, [cmd] + t_args)))
 129     if runsvn_showcmd:
 130         print "$", "("+os.getcwd()+")", cmd_string
 131     if debug_runsvn_timing:
 132         time1 = time.time()
 133     pipe = Popen([cmd] + t_args, executable=cmd, stdout=PIPE, stderr=PIPE)
 134     out, err = pipe.communicate()
 135     if debug_runsvn_timing:
 136         time2 = time.time()
 137         print "(" + str(round(time2-time1,4)) + " elapsed)"
 138     if out and runsvn_showout:
 139         print out
 140     if pipe.returncode != 0 or (fail_if_stderr and err.strip()):
 141         display_error("External program failed (return code %d): %s\n%s"
 142             % (pipe.returncode, cmd_string, err))
 143     return out
 144
 145 def svn_date_to_timestamp(svn_date):
 146     """
 147     Parse an SVN date as read from the XML output and
 148     return the corresponding timestamp.
 149     """
 150     # Strip microseconds and timezone (always UTC, hopefully)
 151     # XXX there are various ISO datetime parsing routines out there,
 152     # cf. http://seehuhn.de/comp/pdate
 153     date = svn_date.split('.', 2)[0]
 154     time_tuple = time.strptime(date, "%Y-%m-%dT%H:%M:%S")
 155     return calendar.timegm(time_tuple)
 156
 157 def parse_svn_info_xml(xml_string):
 158     """
 159     Parse the XML output from an "svn info" command and extract
 160     useful information as a dict.
 161     """
 162     d = {}
 163     tree = ET.fromstring(xml_string)
 164     entry = tree.find('.//entry')
 165     if entry:
 166         d['url'] = entry.find('url').text
 167         d['revision'] = int(entry.get('revision'))
 168         d['repos_url'] = tree.find('.//repository/root').text
 169         d['last_changed_rev'] = int(tree.find('.//commit').get('revision'))
 170         d['kind'] = entry.get('kind')
 171     return d
 172
 173 def parse_svn_log_xml(xml_string):
 174     """
 175     Parse the XML output from an "svn log" command and extract
 176     useful information as a list of dicts (one per log changeset).
 177     """
 178     l = []
 179     tree = ET.fromstring(xml_string)
 180     for entry in tree.findall('logentry'):
 181         d = {}
 182         d['revision'] = int(entry.get('revision'))
 183         # Some revisions don't have authors, most notably
 184         # the first revision in a repository.
 185         author = entry.find('author')
 186         d['author'] = author is not None and author.text or None
 187         d['date'] = svn_date_to_timestamp(entry.find('date').text)
 188         # Some revisions may have empty commit message
 189         message = entry.find('msg')
 190         message = message is not None and message.text is not None \
 191                         and message.text.strip() or ""
 192         # Replace DOS return '\r\n' and MacOS return '\r' with unix return '\n'
 193         d['message'] = message.replace('\r\n', '\n').replace('\n\r', '\n'). \
 194                                replace('\r', '\n')
 195         paths = []
 196         for path in entry.findall('.//path'):
 197             copyfrom_rev = path.get('copyfrom-rev')
 198             if copyfrom_rev:
 199                 copyfrom_rev = int(copyfrom_rev)
 200             paths.append({
 201                 'path': path.text,
 202                 'kind': path.get('kind'),
 203                 'action': path.get('action'),
 204                 'copyfrom_path': path.get('copyfrom-path'),
 205                 'copyfrom_revision': copyfrom_rev,
 206             })
 207         # Need to sort paths (i.e. into hierarchical order), so that process_svn_log_entry()
 208         # can process actions in depth-first order.
 209         d['changed_paths'] = sorted(paths, key=itemgetter('path'))
 210         l.append(d)
 211     return l
 212
 213 def parse_svn_status_xml(xml_string, base_dir=None):
 214     """
 215     Parse the XML output from an "svn status" command and extract
 216     useful info as a list of dicts (one per status entry).
 217     """
 218     l = []
 219     tree = ET.fromstring(xml_string)
 220     for entry in tree.findall('.//entry'):
 221         d = {}
 222         path = entry.get('path')
 223         if base_dir is not None:
 224             assert path.startswith(base_dir)
 225             path = path[len(base_dir):].lstrip('/\\')
 226         d['path'] = path
 227         wc_status = entry.find('wc-status')
 228         if wc_status.get('item') == 'external':
 229             d['type'] = 'external'
 230         # TODO: Optionally check wc_status.get('item') == 'deleted' and return type='unversioned'?
 231         elif wc_status.get('revision') is not None:
 232             d['type'] = 'normal'
 233         else:
 234             d['type'] = 'unversioned'
 235         l.append(d)
 236     return l
 237
 238 def get_svn_info(svn_url_or_wc, rev_number=None):
 239     """
 240     Get SVN information for the given URL or working copy,
 241     with an optionally specified revision number.
 242     Returns a dict as created by parse_svn_info_xml().
 243     """
 244     if rev_number is not None:
 245         args = [svn_url_or_wc + "@" + str(rev_number)]
 246     else:
 247         args = [svn_url_or_wc]
 248     xml_string = run_svn(svn_info_args + args, fail_if_stderr=True)
 249     return parse_svn_info_xml(xml_string)
 250
 251 def svn_checkout(svn_url, checkout_dir, rev_number=None):
 252     """
 253     Checkout the given URL at an optional revision number.
 254     """
 255     args = []
 256     if rev_number is not None:
 257         args += ['-r', rev_number]
 258     args += [svn_url, checkout_dir]
 259     return run_svn(svn_checkout_args + args)
 260
 261 def run_svn_log(svn_url_or_wc, rev_start, rev_end, limit, stop_on_copy=False, get_changed_paths=True):
 262     """
 263     Fetch up to 'limit' SVN log entries between the given revisions.
 264     """
 265     if stop_on_copy:
 266         args = ['--stop-on-copy']
 267     else:
 268         args = []
 269     url = str(svn_url_or_wc)
 270     if rev_start != 'HEAD' and rev_end != 'HEAD':
 271         args += ['-r', '%s:%s' % (rev_start, rev_end)]
 272         if not "@" in svn_url_or_wc:
 273             url += "@" + str(max(rev_start, rev_end))
 274     if get_changed_paths:
 275         args += ['-v']
 276     args += ['--limit', str(limit), url]
 277     xml_string = run_svn(svn_log_args + args)
 278     return parse_svn_log_xml(xml_string)
 279
 280 def get_svn_status(svn_wc, flags=None):
 281     """
 282     Get SVN status information about the given working copy.
 283     """
 284     # Ensure proper stripping by canonicalizing the path
 285     svn_wc = os.path.abspath(svn_wc)
 286     args = []
 287     if flags:
 288         args += [flags]
 289     args += [svn_wc]
 290     xml_string = run_svn(svn_status_args + args)
 291     return parse_svn_status_xml(xml_string, svn_wc)
 292
 293 def get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=False, get_changed_paths=True):
 294     """
 295     Get the first SVN log entry in the requested revision range.
 296     """
 297     entries = run_svn_log(svn_url, rev_start, rev_end, 1, stop_on_copy, get_changed_paths)
 298     if not entries:
 299         display_error("No SVN log for %s between revisions %s and %s" %
 300                       (svn_url, rev_start, rev_end))
 301
 302     return entries[0]
 303
 304 def get_first_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
 305     """
 306     Get the first log entry after/at the given revision number in an SVN branch.
 307     By default the revision number is set to 0, which will give you the log
 308     entry corresponding to the branch creaction.
 309
 310     NOTE: to know whether the branch creation corresponds to an SVN import or
 311     a copy from another branch, inspect elements of the 'changed_paths' entry
 312     in the returned dictionary.
 313     """
 314     return get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True, get_changed_paths=True)
 315
 316 def get_last_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
 317     """
 318     Get the last log entry before/at the given revision number in an SVN branch.
 319     By default the revision number is set to HEAD, which will give you the log
 320     entry corresponding to the latest commit in branch.
 321     """
 322     return get_one_svn_log_entry(svn_url, rev_end, rev_start, stop_on_copy=True, get_changed_paths=True)
 323
 324
 325 log_duration_threshold = 10.0
 326 log_min_chunk_length = 10
 327
 328 def iter_svn_log_entries(svn_url, first_rev, last_rev):
 329     """
 330     Iterate over SVN log entries between first_rev and last_rev.
 331
 332     This function features chunked log fetching so that it isn't too nasty
 333     to the SVN server if many entries are requested.
 334     """
 335     cur_rev = first_rev
 336     chunk_length = log_min_chunk_length
 337     chunk_interval_factor = 1.0
 338     while last_rev == "HEAD" or cur_rev <= last_rev:
 339         start_t = time.time()
 340         stop_rev = min(last_rev, cur_rev + int(chunk_length * chunk_interval_factor))
 341         entries = run_svn_log(svn_url, cur_rev, stop_rev, chunk_length)
 342         duration = time.time() - start_t
 343         if not entries:
 344             if stop_rev == last_rev:
 345                 break
 346             cur_rev = stop_rev + 1
 347             chunk_interval_factor *= 2.0
 348             continue
 349         for e in entries:
 350             yield e
 351         cur_rev = e['revision'] + 1
 352         # Adapt chunk length based on measured request duration
 353         if duration < log_duration_threshold:
 354             chunk_length = int(chunk_length * 2.0)
 355         elif duration > log_duration_threshold * 2:
 356             chunk_length = max(log_min_chunk_length, int(chunk_length / 2.0))
 357
 358 def commit_from_svn_log_entry(entry, files=None, keep_author=False):
 359     """
 360     Given an SVN log entry and an optional sequence of files, do an svn commit.
 361     """
 362     # TODO: Run optional external shell hook here, for doing pre-commit filtering
 363     # This will use the local timezone for displaying commit times
 364     timestamp = int(entry['date'])
 365     svn_date = str(datetime.fromtimestamp(timestamp))
 366     # Uncomment this one one if you prefer UTC commit times
 367     #svn_date = "%d 0" % timestamp
 368     if keep_author:
 369         options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date, "--username", entry['author']]
 370     else:
 371         options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date + "\nAuthor: " + entry['author']]
 372     if files:
 373         options += list(files)
 374     print "(Committing source rev #"+str(entry['revision'])+"...)"
 375     run_svn(options)
 376
 377 def in_svn(p):
 378     """
 379     Check if a given file/folder is being tracked by Subversion.
 380     Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
 381     With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
 382     Use "svn status" to check the status of the file/folder.
 383     """
 384     # TODO: Is there a better way to do this?
 385     entries = get_svn_status(p)
 386     if not entries:
 387       return False
 388     d = entries[0]
 389     return (d['type'] == 'normal')
 390
 391 def find_svn_ancestors(source_repos_url, source_base, source_offset, copyfrom_path, copyfrom_rev):
 392     """
 393     Given a copy-from path (copyfrom_path), walk the SVN history backwards to inspect
 394     the ancestory of that path. Build a collection of copyfrom_path+revision pairs
 395     for each of the branch-copies since the initial branch-creation.  If we find a
 396     copyfrom_path which source_base is a substring match of (e.g. we crawled back to
 397     the initial branch-copy from trunk), then return the collection of ancestor paths.
 398     Otherwise, copyfrom_path has no ancestory compared to source_base.
 399
 400     This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
 401     file/folder was renamed in a branch and then that branch was merged back to trunk.
 402
 403     PARAMETERS:
 404     * source_repos_url = Full URL to root of repository, e.g. 'file:///path/to/repos'
 405     * source_base = e.g. '/trunk'
 406     * source_offset = e.g. 'projectA/file1.txt'
 407     * copyfrom_path = e.g. '/branches/bug123/projectA/file1.txt'
 408     """
 409
 410     done = False
 411     working_path = copyfrom_path
 412     working_base = copyfrom_path[:-len(source_offset)].rstrip('/')
 413     working_offset = source_offset.strip('/')
 414     working_rev = copyfrom_rev
 415     ancestors = [{'path': [working_base, working_offset], 'revision': working_rev}]
 416     while not done:
 417         # Get the first "svn log" entry for this path (relative to @rev)
 418         #working_path = working_base + "/" + working_offset
 419         if debug:
 420             print ">> find_svn_ancestors: " + source_repos_url + working_path + "@" + str(working_rev) + \
 421                    "  (" + working_base + " " + working_offset + ")"
 422         log_entry = get_first_svn_log_entry(source_repos_url + working_path + "@" + str(working_rev), 1, str(working_rev), True)
 423         if not log_entry:
 424             done = True
 425         # Find the action for our working_path in this revision
 426         for d in log_entry['changed_paths']:
 427             path = d['path']
 428             if not path in working_path:
 429                 continue
 430             # Check action-type for this file
 431             action = d['action']
 432             if action not in 'MARD':
 433                 display_error("In SVN rev. %d: action '%s' not supported. \
 434                                Please report a bug!" % (log_entry['revision'], action))
 435             if debug:
 436                 debug_desc = ": " + action + " " + path
 437                 if d['copyfrom_path']:
 438                     debug_desc += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
 439                 print debug_desc
 440
 441             if action == 'R':
 442                 # If file/folder was replaced, it has no ancestor
 443                 return []
 444             if action == 'D':
 445                 # If file/folder was deleted, it has no ancestor
 446                 return []
 447             if action == 'A':
 448                 # If file/folder was added but not a copy, it has no ancestor
 449                 if not d['copyfrom_path']:
 450                     return []
 451                 # Else, file/folder was added and is a copy, so check ancestors
 452                 path_old = d['copyfrom_path']
 453                 working_path = working_path.replace(path, path_old)
 454                 if working_base in working_path:
 455                     # If the new and old working_path share the same working_base, just need to update working_offset.
 456                     working_offset = working_path[len(working_base)+1:]
 457                 else:
 458                     # Else, assume that working_base has changed but working_offset is the same, e.g. a re-branch.
 459                     # TODO: Is this a safe assumption?!
 460                     working_base = working_path[:-len(working_offset)].rstrip('/')
 461                 working_rev = d['copyfrom_revision']
 462                 if debug:
 463                     print ">> find_svn_ancestors: copy-from: " + working_base + " " + working_offset + "@" + str(working_rev)
 464                 ancestors.append({'path': [working_base, working_offset], 'revision': working_rev})
 465                 # If we found a copy-from case which matches our source_base, we're done
 466                 if (path_old == source_base) or (path_old.startswith(source_base + "/")):
 467                     return ancestors
 468                 # Else, follow the copy and keep on searching
 469                 break
 470     return None
 471
 472 def replay_svn_ancestors(ancestors, source_repos_url, source_url, target_url):
 473     """
 474     Given an array of ancestor info (find_svn_ancestors), replay the history
 475     to correctly track renames ("svn copy/move") across branch-merges.
 476
 477     For example, consider a sequence of events like this:
 478     1. svn copy /trunk /branches/fix1
 479     2. (Make some changes on /branches/fix1)
 480     3. svn copy /branches/fix1/Proj1 /branches/fix1/Proj2  " Rename folder
 481     4. svn copy /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt  " Rename file inside renamed folder
 482     5. svn co /trunk && svn merge /branches/fix1
 483     After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
 484     and and add of /trunk/Proj2 comp-from /branches/fix1/Proj2. If we were just
 485     to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
 486     we'd lose the logical history that Proj2/file2.txt is really a descendant
 487     of Proj1/file1.txt.
 488
 489     'source_repos_url' is the full URL to the root of the source repository.
 490     'ancestors' is the array returned by find_svn_ancestors() with the final
 491       destination info appended to it by process_svn_log_entry().
 492     'dest_path'
 493     """
 494     # Ignore ancestors[0], which is the original (pre-branch-copy) trunk path
 495     # Ignore ancestors[1], which is the original branch-creation commit
 496     # Ignore ancestors[n], which is the final commit back to trunk
 497     for idx in range(1, len(ancestors)-1):
 498         ancestor = ancestors[idx]
 499         source_base = ancestor['path'][0]
 500         source_offset = ancestor['path'][1]
 501         source_path = source_base + "/" + source_offset
 502         source_rev = ancestor['revision']
 503         source_rev_next = ancestors[idx+1]['revision']
 504         # Do a "svn log" on the _parent_ directory of source_path, since trying to get log info
 505         # for the "old path" on the revision where the copy/move happened will fail.
 506         if "/" in source_path:
 507             p_source_path = source_path[:source_path.rindex('/')]
 508         else:
 509             p_source_path = ""
 510         if debug:
 511             print ">> replay_svn_ancestors: ["+str(idx)+"]" + source_path+"@"+str(source_rev) + "  ["+p_source_path+"@"+str(source_rev)+":"+str(source_rev_next-1)+"]"
 512         it_log_entries = iter_svn_log_entries(source_repos_url+p_source_path, source_rev, source_rev_next-1)
 513         for log_entry in it_log_entries:
 514             #print ">> replay_svn_ancestors: log_entry: (" + source_repos_url+source_base + ")"
 515             #print log_entry
 516             # TODO: Hit a problem case with a rename-situation where the "remove" was committed ahead of the "add (copy)".
 517             #       Do we maybe need to buffer all the remove's until the end of the entire replay session?
 518             #       Or can we maybe work around this by passing an explicit rev # into "svn copy"?
 519             process_svn_log_entry(log_entry, source_repos_url, source_repos_url+source_base, target_url)
 520
 521 def process_svn_log_entry(log_entry, source_repos_url, source_url, target_url):
 522     """
 523     Process SVN changes from the given log entry.
 524     Returns array of all the paths in the working-copy that were changed,
 525     i.e. the paths which need to be "svn commit".
 526
 527     'log_entry' is the array structure built by parse_svn_log_xml().
 528     'source_repos_url' is the full URL to the root of the source repository.
 529     'source_url' is the full URL to the source path in the source repository.
 530     'target_url' is the full URL to the target path in the target repository.
 531     """
 532     # Get the relative offset of source_url based on source_repos_url, e.g. u'/branches/bug123'
 533     source_base = source_url[len(source_repos_url):]
 534     if debug:
 535         print ">> process_svn_log_entry: " + source_url + " (" + source_base + ")"
 536
 537     svn_rev = log_entry['revision']
 538     # Get current target revision, for "svn copy" support
 539     dup_info = get_svn_info(target_url)
 540     dup_rev = dup_info['revision']
 541
 542     removed_paths = []
 543     unrelated_paths = []
 544     commit_paths = []
 545
 546     for d in log_entry['changed_paths']:
 547         # Get the full path for this changed_path
 548         # e.g. u'/branches/bug123/projectA/file1.txt'
 549         path = d['path']
 550         if not path.startswith(source_base + "/"):
 551             # Ignore changed files that are not part of this subdir
 552             if path != source_base:
 553                 print ">> process_svn_log_entry: Unrelated path: " + path + "  (" + source_base + ")"
 554                 unrelated_paths.append(path)
 555             continue
 556         # Calculate the offset (based on source_base) for this changed_path
 557         # e.g. u'projectA/file1.txt'
 558         # (path = source_base + "/" + path_offset)
 559         path_offset = path[len(source_base):].strip("/")
 560         # Get the action for this path
 561         action = d['action']
 562         if action not in 'MARD':
 563             display_error("In SVN rev. %d: action '%s' not supported. \
 564                            Please report a bug!" % (svn_rev, action))
 565
 566         # Try to be efficient and keep track of an explicit list of paths in the
 567         # working copy that changed. If we commit from the root of the working copy,
 568         # then SVN needs to crawl the entire working copy looking for pending changes.
 569         # But, if we gather too many paths to commit, then we wipe commit_paths below
 570         # and end-up doing a commit at the root of the working-copy.
 571         if len (commit_paths) < 100:
 572             commit_paths.append(path_offset)
 573
 574         # Special-handling for replace's
 575         is_replace = False
 576         if action == 'R':
 577             if svnlog_verbose:
 578                 msg = " " + action + " " + d['path']
 579                 if d['copyfrom_path']:
 580                     msg += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
 581                 print msg
 582             # If file was "replaced" (deleted then re-added, all in same revision),
 583             # then we need to run the "svn rm" first, then change action='A'. This
 584             # lets the normal code below handle re-"svn add"'ing the files. This
 585             # should replicate the "replace".
 586             run_svn(["up", path_offset])
 587             run_svn(["remove", "--force", path_offset])
 588             action = 'A'
 589             is_replace = True
 590
 591         # Handle all the various action-types
 592         # (Handle "add" first, for "svn copy/move" support)
 593         if action == 'A':
 594             if svnlog_verbose:
 595                 msg = " " + action + " " + d['path']
 596                 if d['copyfrom_path']:
 597                     msg += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
 598                 print msg
 599             # Determine where to export from
 600             copyfrom_rev = svn_rev
 601             copyfrom_path = path
 602             svn_copy = False
 603             # Handle cases where this "add" was a copy from another URL in the source repos
 604             if d['copyfrom_revision']:
 605                 copyfrom_rev = d['copyfrom_revision']
 606                 copyfrom_path = d['copyfrom_path']
 607                 if debug:
 608                     print ">> process_svn_log_entry: copy-to: " + source_base + " " + path_offset
 609                 if source_base in copyfrom_path:
 610                     # If the copy-from path is inside the current working-copy, no need to check ancestry.
 611                     ancestors = []
 612                     copyfrom_path = copyfrom_path[len(source_base):].strip("/")
 613                     if debug:
 614                         print ">> process_svn_log_entry: Found copy: " + copyfrom_path+"@"+str(copyfrom_rev)
 615                     svn_copy = True
 616                 else:
 617                     ancestors = find_svn_ancestors(source_repos_url, source_base, path_offset,
 618                                                    copyfrom_path, copyfrom_rev)
 619                 if ancestors:
 620                     # Reverse the list, so that we loop in chronological order
 621                     ancestors.reverse()
 622                     # Append the current revision
 623                     ancestors.append({'path': [source_base, path_offset], 'revision': svn_rev})
 624                     # ancestors[0] is the original (pre-branch-copy) trunk path.
 625                     # ancestors[1] is the first commit on the new branch.
 626                     copyfrom_rev =  ancestors[0]['revision']
 627                     copyfrom_base = ancestors[0]['path'][0]
 628                     copyfrom_offset = ancestors[0]['path'][1]
 629                     copyfrom_path = copyfrom_base + copyfrom_offset
 630                     if debug:
 631                         print ">> process_svn_log_entry: FOUND PARENT:"
 632                         for idx in range(0,len(ancestors)):
 633                             ancestor = ancestors[idx]
 634                             print "     ["+str(idx)+"] " + ancestor['path'][0]+" "+ancestor['path'][1]+"@"+str(ancestor['revision'])
 635                     #print ">> process_svn_log_entry: copyfrom_path (before): " + copyfrom_path + " source_base: " + source_base + " p: " + p
 636                     copyfrom_path = copyfrom_path[len(source_base):].strip("/")
 637                     #print ">> process_svn_log_entry: copyfrom_path (after): " + copyfrom_path
 638                     svn_copy = True
 639             # If this add was a copy-from, do a smart replay of the ancestors' history.
 640             if svn_copy:
 641                 if debug:
 642                     print ">> process_svn_log_entry: svn_copy: copy-from: " + copyfrom_path+"@"+str(copyfrom_rev) + "  source_base: "+source_base + "  len(ancestors): " + str(len(ancestors))
 643                 # If we don't have any ancestors, then this is just a straight "svn copy" in the current working-copy.
 644                 if not ancestors:
 645                     # ...but not if the target is already tracked, because this might run several times for the same path.
 646                     # TODO: Is there a better way to avoid recusion bugs? Maybe a collection of processed paths?
 647                     # TODO: The "not in_svn" check creates problems for action="R" cases, e.g. r18834
 648                     if (not in_svn(path_offset)) or is_replace:
 649                         if os.path.exists(copyfrom_path):
 650                             # If the copyfrom_path exists in the working-copy, do a local copy
 651                             run_svn(["copy", copyfrom_path, path_offset])
 652                         else:
 653                             # TODO: This doesn't respect copyfrom_rev at all. Found a case where file was (accidentally?)
 654                             #       deleted in one commit and restored (added copy-from) in a latter commit. Do we maybe
 655                             #       need a mapping table of target_url -> source_url rev #'s, so that given a source_url
 656                             #       copyfrom_rev, we can map that to the equiv target_url rev#, so we do the "svn copy"
 657                             #       here correctly?
 658                             tmp_rev = dup_rev  # Kludge for time-being
 659                             if copyfrom_path == 'Data/Databases/DBUpdate.mdb' and copyfrom_rev == 17568:
 660                                 tmp_rev = dup_rev-10
 661                             run_svn(["copy", "-r", tmp_rev, target_url+"/"+copyfrom_path+"@"+str(tmp_rev), path_offset])
 662                 else:
 663                     if d['kind'] == 'dir':
 664                         # Replay any actions which happened to this folder from the ancestor path(s).
 665                         replay_svn_ancestors(ancestors, source_repos_url, source_url, target_url)
 666                     else:
 667                         # Just do a straight "svn copy" for files. There isn't any kind of "dependent"
 668                         # history we might need to replay like for folders.
 669                         # TODO: Is this logic really correct? Doing a WC vs URL "svn copy" based on existence
 670                         #       of *source* location seems a bit kludgy. Should there be a running list of
 671                         #       renames during replay_svn_ancestors >> process_svn_log_entry?
 672                         if os.path.exists(copyfrom_path):
 673                             # If the copyfrom_path exists in the working-copy, do a local copy
 674                             run_svn(["copy", copyfrom_path, path_offset])
 675                         else:
 676                             # Else, could be a situation where replay_svn_ancestors() is replaying branch
 677                             # history and a copy was committed across two revisions: first the deletion
 678                             # followed by the later add. In such a case, we need to copy from HEAD (dup_rev)
 679                             # of the path in *target_url*
 680                             run_svn(["copy", "-r", dup_rev, target_url+"/"+copyfrom_path+"@"+str(dup_rev), path_offset])
 681             # Else just copy/export the files from the source repo and "svn add" them.
 682             else:
 683                 # Create (parent) directory if needed
 684                 if d['kind'] == 'dir':
 685                     p_path = path_offset
 686                 else:
 687                     p_path = os.path.dirname(path_offset).strip() or '.'
 688                 if not os.path.exists(p_path):
 689                     os.makedirs(p_path)
 690                 # Export the entire added tree.
 691                 run_svn(["export", "--force", "-r", str(copyfrom_rev),
 692                          source_repos_url + copyfrom_path + "@" + str(copyfrom_rev), path_offset])
 693                 # TODO: The "no in_svn" condition here is wrong for replace cases.
 694                 #       Added the in_svn condition here originally since "svn export" is recursive
 695                 #       but "svn log" will have an entry for each indiv file, hence we run into a
 696                 #       cannot-re-add-file-which-is-already-added issue.
 697                 if (not in_svn(path_offset)) or (is_replace):
 698                     run_svn(["add", "--parents", path_offset])
 699                 # TODO: Need to copy SVN properties from source repos
 700
 701         elif action == 'D':
 702             # Queue "svn remove" commands, to allow the action == 'A' handling the opportunity
 703             # to do smart "svn copy" handling on copy/move/renames.
 704             removed_paths.append(path_offset)
 705
 706         elif action == 'M':
 707             if svnlog_verbose:
 708                 print " " + action + " " + d['path']
 709             out = run_svn(["merge", "-c", str(svn_rev), "--non-recursive",
 710                      "--non-interactive", "--accept=theirs-full",
 711                      source_url+"/"+path_offset+"@"+str(svn_rev), path_offset])
 712
 713         else:
 714             display_error("Internal Error: pull_svn_rev: Unhandled 'action' value: '" + action + "'")
 715
 716     if removed_paths:
 717         for path_offset in removed_paths:
 718             if svnlog_verbose:
 719                 print " D " + source_base+"/"+path_offset
 720             run_svn(["remove", "--force", path_offset])
 721
 722     if unrelated_paths:
 723         print "Unrelated paths: (vs. '" + source_base + "')"
 724         print "*", unrelated_paths
 725
 726     return commit_paths
 727
 728 def pull_svn_rev(log_entry, source_repos_url, source_url, target_url, keep_author=False):
 729     """
 730     Pull SVN changes from the given log entry.
 731     Returns the new SVN revision.
 732     If an exception occurs, it will rollback to revision 'svn_rev - 1'.
 733     """
 734     ## Get the relative offset of source_url based on source_repos_url, e.g. u'/branches/bug123'
 735     #source_base = source_url[len(source_repos_url):]
 736
 737     svn_rev = log_entry['revision']
 738     print "\n(Starting source rev #"+str(svn_rev)+":)"
 739     print "r"+str(log_entry['revision']) + " | " + \
 740           log_entry['author'] + " | " + \
 741           str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' '))
 742     print log_entry['message']
 743     print "------------------------------------------------------------------------"
 744     commit_paths = process_svn_log_entry(log_entry, source_repos_url, source_url, target_url)
 745
 746     # If we had too many individual paths to commit, wipe the list and just commit at
 747     # the root of the working copy.
 748     if len (commit_paths) > 99:
 749         commit_paths = []
 750
 751     # TODO: Use SVN properties to track source URL + rev in the target repo?
 752     #       This would provide a more reliable resume-support
 753     try:
 754         commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author)
 755     except ExternalCommandFailed:
 756         # try to ignore the Properties conflicts on files and dirs
 757         # use the copy from original_wc
 758         # TODO: Need to re-work this?
 759         #has_Conflict = False
 760         #for d in log_entry['changed_paths']:
 761         #    p = d['path']
 762         #    p = p[len(source_base):].strip("/")
 763         #    if os.path.isfile(p):
 764         #        if os.path.isfile(p + ".prej"):
 765         #            has_Conflict = True
 766         #            shutil.copy(original_wc + os.sep + p, p)
 767         #            p2=os.sep + p.replace('_', '__').replace('/', '_') \
 768         #                      + ".prej-" + str(svn_rev)
 769         #            shutil.move(p + ".prej", os.path.dirname(original_wc) + p2)
 770         #            w="\n### Properties conflicts ignored:"
 771         #            print "%s %s, in revision: %s\n" % (w, p, svn_rev)
 772         #    elif os.path.isdir(p):
 773         #        if os.path.isfile(p + os.sep + "dir_conflicts.prej"):
 774         #            has_Conflict = True
 775         #            p2=os.sep + p.replace('_', '__').replace('/', '_') \
 776         #                      + "_dir__conflicts.prej-" + str(svn_rev)
 777         #            shutil.move(p + os.sep + "dir_conflicts.prej",
 778         #                        os.path.dirname(original_wc) + p2)
 779         #            w="\n### Properties conflicts ignored:"
 780         #            print "%s %s, in revision: %s\n" % (w, p, svn_rev)
 781         #            out = run_svn(["propget", "svn:ignore",
 782         #                           original_wc + os.sep + p])
 783         #            if out:
 784         #                run_svn(["propset", "svn:ignore", out.strip(), p])
 785         #            out = run_svn(["propget", "svn:externel",
 786         #                           original_wc + os.sep + p])
 787         #            if out:
 788         #                run_svn(["propset", "svn:external", out.strip(), p])
 789         ## try again
 790         #if has_Conflict:
 791         #    commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author)
 792         #else:
 793             raise ExternalCommandFailed
 794     print "(Finished source rev #"+str(svn_rev)+")"
 795
 796
 797 def main():
 798     usage = "Usage: %prog [-a] [-c] [-r SVN rev] <Source SVN URL> <Target SVN URL>"
 799     parser = OptionParser(usage)
 800     parser.add_option("-a", "--keep-author", action="store_true",
 801                       dest="keep_author", help="Keep revision Author or not")
 802     parser.add_option("-c", "--continue-from-break", action="store_true",
 803                       dest="cont_from_break",
 804                       help="Continue from previous break")
 805     parser.add_option("-r", "--svn-rev", type="int", dest="svn_rev",
 806                       help="SVN revision to checkout from")
 807     (options, args) = parser.parse_args()
 808     if len(args) != 2:
 809         display_error("incorrect number of arguments\n\nTry: svn2svn.py --help",
 810                       False)
 811
 812     source_url = args.pop(0).rstrip("/")
 813     target_url = args.pop(0).rstrip("/")
 814     if options.keep_author:
 815         keep_author = True
 816     else:
 817         keep_author = False
 818
 819     # Find the greatest_rev in the source repo
 820     svn_info = get_svn_info(source_url)
 821     greatest_rev = svn_info['revision']
 822
 823     dup_wc = "_dup_wc"
 824
 825     # if old working copy does not exist, disable continue mode
 826     # TODO: Better continue support. Maybe include source repo's rev # in target commit info?
 827     if not os.path.exists(dup_wc):
 828         options.cont_from_break = False
 829
 830     if not options.cont_from_break:
 831         # Warn if Target SVN URL existed
 832         cmd = find_program("svn")
 833         pipe = Popen([cmd] + ["list"] + [target_url], executable=cmd,
 834                      stdout=PIPE, stderr=PIPE)
 835         out, err = pipe.communicate()
 836         if pipe.returncode == 0:
 837             print "Target SVN URL: %s existed!" % target_url
 838             if out:
 839                 print out
 840             print "Press 'Enter' to Continue, 'Ctrl + C' to Cancel..."
 841             print "(Timeout in 5 seconds)"
 842             rfds, wfds, efds = select.select([sys.stdin], [], [], 5)
 843
 844         # Get log entry for the SVN revision we will check out
 845         if options.svn_rev:
 846             # If specify a rev, get log entry just before or at rev
 847             svn_start_log = get_last_svn_log_entry(source_url, 1, options.svn_rev, False)
 848         else:
 849             # Otherwise, get log entry of branch creation
 850             # TODO: This call is *very* expensive on a repo with lots of revisions.
 851             #       Even though the call is passing --limit 1, it seems like that limit-filter
 852             #       is happening after SVN has fetched the full log history.
 853             svn_start_log = get_first_svn_log_entry(source_url, 1, greatest_rev, False)
 854
 855         # This is the revision we will start from for source_url
 856         svn_rev = svn_start_log['revision']
 857
 858         # Check out a working copy of target_url
 859         dup_wc = os.path.abspath(dup_wc)
 860         if os.path.exists(dup_wc):
 861             shutil.rmtree(dup_wc)
 862         svn_checkout(target_url, dup_wc)
 863         os.chdir(dup_wc)
 864
 865         # For the initial commit to the target URL, export all the contents from
 866         # the source URL at the start-revision.
 867         paths = run_svn(["list", "-r", str(svn_rev), source_url+"@"+str(svn_rev)])
 868         paths = paths.strip("\n").split("\n")
 869         for path in paths:
 870             if not path:
 871                 # Skip null lines
 872                 break
 873             # Directories have a trailing slash in the "svn list" output
 874             if path[-1] == "/":
 875                 path=path.rstrip('/')
 876                 if not os.path.exists(path):
 877                     os.makedirs(path)
 878             run_svn(["export", "--force", "-r" , str(svn_rev), source_url+"/"+path+"@"+str(svn_rev), path])
 879             run_svn(["add", path])
 880         commit_from_svn_log_entry(svn_start_log, [], keep_author)
 881     else:
 882         dup_wc = os.path.abspath(dup_wc)
 883         os.chdir(dup_wc)
 884         # TODO: Need better resume support. For the time being, expect caller explictly passes in resume revision.
 885         svn_rev = options.svn_rev
 886         if svn_rev < 1:
 887             display_error("Invalid arguments\n\nNeed to pass result rev # (-r) when using continue-mode (-c)", False)
 888
 889
 890     # Get SVN info
 891     svn_info = get_svn_info(source_url)
 892     # Get the base URL for the source repos, e.g. u'svn://svn.example.com/svn/repo'
 893     source_repos_url = svn_info['repos_url']
 894
 895     # Load SVN log starting from svn_rev + 1
 896     it_log_entries = iter_svn_log_entries(source_url, svn_rev + 1, greatest_rev)
 897
 898     try:
 899         for log_entry in it_log_entries:
 900             # Replay this revision from source_url into target_url
 901             pull_svn_rev(log_entry, source_repos_url, source_url, target_url, keep_author)
 902             # Update our target working-copy, to ensure everything says it's at the new HEAD revision
 903             run_svn(["up", dup_wc])
 904
 905     except KeyboardInterrupt:
 906         print "\nStopped by user."
 907         run_svn(["cleanup"])
 908         run_svn(["revert", "--recursive", "."])
 909     except:
 910         print "\nCommand failed with following error:\n"
 911         traceback.print_exc()
 912         run_svn(["cleanup"])
 913         run_svn(["revert", "--recursive", "."])
 914     finally:
 915         run_svn(["up"])
 916         print "\nFinished!"
 917
 918
 919 if __name__ == "__main__":
 920     main()
 921
 922 # vim:sts=4:sw=4: