svn2svn.py

   1 #!/usr/bin/env python
   2 """
   3 svn2svn.py
   4
   5 Replicate (replay) changesets from one SVN repository to another:
   6 * Maintains full logical history (e.g. uses "svn copy" for renames).
   7 * Maintains original commit messages.
   8 * Cannot maintain original commit date, but appends original commit date
   9   for each commit message: "Date: %d".
  10 * Optionally maintain source author info. (Only supported if accessing
  11   target SVN repo via file://)
  12 * Optionally run an external shell script before each replayed commit
  13   to give the ability to dynamically exclude or modify files as part
  14   of the replay.
  15
  16 License: GPLv2, the same as hgsvn.
  17 Author: Tony Duckles (https://github.com/tonyduckles/svn2svn)
  18 (This is a forked and modified verison of http://code.google.com/p/svn2svn/)
  19 """
  20
  21 import os
  22 import sys
  23 import time
  24 import locale
  25 import shutil
  26 import select
  27 import calendar
  28 import traceback
  29
  30 from optparse import OptionParser
  31 from subprocess import Popen, PIPE
  32 from datetime import datetime
  33
  34 try:
  35     from xml.etree import cElementTree as ET
  36 except ImportError:
  37     try:
  38         from xml.etree import ElementTree as ET
  39     except ImportError:
  40         try:
  41             import cElementTree as ET
  42         except ImportError:
  43             from elementtree import ElementTree as ET
  44
  45 svn_log_args = ['log', '--xml']
  46 svn_info_args = ['info', '--xml']
  47 svn_checkout_args = ['checkout', '-q']
  48 svn_status_args = ['status', '--xml', '-v', '--ignore-externals']
  49
  50 # Setup debug options
  51 debug = False
  52 debug_runsvn_timing = False    # Display how long each "svn" OS command took to run?
  53 # Setup verbosity options
  54 runsvn_showcmd = False    # Display every "svn" OS command we run?
  55 runsvn_showout = False    # Display the stdout results from every  "svn" OS command we run?
  56 svnlog_verbose = True     # Display each action + changed-path as we walk the history?
  57
  58 # define exception class
  59 class ExternalCommandFailed(RuntimeError):
  60     """
  61     An external command failed.
  62     """
  63
  64 def display_error(message, raise_exception = True):
  65     """
  66     Display error message, then terminate.
  67     """
  68     print "Error:", message
  69     print
  70     if raise_exception:
  71         raise ExternalCommandFailed
  72     else:
  73         sys.exit(1)
  74
  75 # Windows compatibility code by Bill Baxter
  76 if os.name == "nt":
  77     def find_program(name):
  78         """
  79         Find the name of the program for Popen.
  80         Windows is finnicky about having the complete file name. Popen
  81         won't search the %PATH% for you automatically.
  82         (Adapted from ctypes.find_library)
  83         """
  84         # See MSDN for the REAL search order.
  85         base, ext = os.path.splitext(name)
  86         if ext:
  87             exts = [ext]
  88         else:
  89             exts = ['.bat', '.exe']
  90         for directory in os.environ['PATH'].split(os.pathsep):
  91             for e in exts:
  92                 fname = os.path.join(directory, base + e)
  93                 if os.path.exists(fname):
  94                     return fname
  95         return None
  96 else:
  97     def find_program(name):
  98         """
  99         Find the name of the program for Popen.
 100         On Unix, popen isn't picky about having absolute paths.
 101         """
 102         return name
 103
 104 def shell_quote(s):
 105     if os.name == "nt":
 106         q = '"'
 107     else:
 108         q = "'"
 109     return q + s.replace('\\', '\\\\').replace("'", "'\"'\"'") + q
 110
 111 locale_encoding = locale.getpreferredencoding()
 112
 113 def run_svn(args, fail_if_stderr=False, encoding="utf-8"):
 114     """
 115     Run svn cmd in PIPE
 116     exit if svn cmd failed
 117     """
 118     def _transform_arg(a):
 119         if isinstance(a, unicode):
 120             a = a.encode(encoding or locale_encoding)
 121         elif not isinstance(a, str):
 122             a = str(a)
 123         return a
 124     t_args = map(_transform_arg, args)
 125
 126     cmd = find_program("svn")
 127     cmd_string = str(" ".join(map(shell_quote, [cmd] + t_args)))
 128     if runsvn_showcmd:
 129         print "$", "("+os.getcwd()+")", cmd_string
 130     if debug_runsvn_timing:
 131         time1 = time.time()
 132     pipe = Popen([cmd] + t_args, executable=cmd, stdout=PIPE, stderr=PIPE)
 133     out, err = pipe.communicate()
 134     if debug_runsvn_timing:
 135         time2 = time.time()
 136         print "(" + str(round(time2-time1,4)) + " elapsed)"
 137     if out and runsvn_showout:
 138         print out
 139     if pipe.returncode != 0 or (fail_if_stderr and err.strip()):
 140         display_error("External program failed (return code %d): %s\n%s"
 141             % (pipe.returncode, cmd_string, err))
 142     return out
 143
 144 def svn_date_to_timestamp(svn_date):
 145     """
 146     Parse an SVN date as read from the XML output and
 147     return the corresponding timestamp.
 148     """
 149     # Strip microseconds and timezone (always UTC, hopefully)
 150     # XXX there are various ISO datetime parsing routines out there,
 151     # cf. http://seehuhn.de/comp/pdate
 152     date = svn_date.split('.', 2)[0]
 153     time_tuple = time.strptime(date, "%Y-%m-%dT%H:%M:%S")
 154     return calendar.timegm(time_tuple)
 155
 156 def parse_svn_info_xml(xml_string):
 157     """
 158     Parse the XML output from an "svn info" command and extract
 159     useful information as a dict.
 160     """
 161     d = {}
 162     tree = ET.fromstring(xml_string)
 163     entry = tree.find('.//entry')
 164     if entry:
 165         d['url'] = entry.find('url').text
 166         d['revision'] = int(entry.get('revision'))
 167         d['repos_url'] = tree.find('.//repository/root').text
 168         d['last_changed_rev'] = int(tree.find('.//commit').get('revision'))
 169         d['kind'] = entry.get('kind')
 170     return d
 171
 172 def parse_svn_log_xml(xml_string):
 173     """
 174     Parse the XML output from an "svn log" command and extract
 175     useful information as a list of dicts (one per log changeset).
 176     """
 177     l = []
 178     tree = ET.fromstring(xml_string)
 179     for entry in tree.findall('logentry'):
 180         d = {}
 181         d['revision'] = int(entry.get('revision'))
 182         # Some revisions don't have authors, most notably
 183         # the first revision in a repository.
 184         author = entry.find('author')
 185         d['author'] = author is not None and author.text or None
 186         d['date'] = svn_date_to_timestamp(entry.find('date').text)
 187         # Some revisions may have empty commit message
 188         message = entry.find('msg')
 189         message = message is not None and message.text is not None \
 190                         and message.text.strip() or ""
 191         # Replace DOS return '\r\n' and MacOS return '\r' with unix return '\n'
 192         d['message'] = message.replace('\r\n', '\n').replace('\n\r', '\n'). \
 193                                replace('\r', '\n')
 194         paths = d['changed_paths'] = []
 195         for path in entry.findall('.//path'):
 196             copyfrom_rev = path.get('copyfrom-rev')
 197             if copyfrom_rev:
 198                 copyfrom_rev = int(copyfrom_rev)
 199             paths.append({
 200                 'path': path.text,
 201                 'kind': path.get('kind'),
 202                 'action': path.get('action'),
 203                 'copyfrom_path': path.get('copyfrom-path'),
 204                 'copyfrom_revision': copyfrom_rev,
 205             })
 206         # Need to sort paths (i.e. into hierarchical order), so that process_svn_log_entry()
 207         # can process actions in depth-first order.
 208         paths.sort()
 209         l.append(d)
 210     return l
 211
 212 def parse_svn_status_xml(xml_string, base_dir=None):
 213     """
 214     Parse the XML output from an "svn status" command and extract
 215     useful info as a list of dicts (one per status entry).
 216     """
 217     l = []
 218     tree = ET.fromstring(xml_string)
 219     for entry in tree.findall('.//entry'):
 220         d = {}
 221         path = entry.get('path')
 222         if base_dir is not None:
 223             assert path.startswith(base_dir)
 224             path = path[len(base_dir):].lstrip('/\\')
 225         d['path'] = path
 226         wc_status = entry.find('wc-status')
 227         if wc_status.get('item') == 'external':
 228             d['type'] = 'external'
 229         elif wc_status.get('revision') is not None:
 230             d['type'] = 'normal'
 231         else:
 232             d['type'] = 'unversioned'
 233         l.append(d)
 234     return l
 235
 236 def get_svn_info(svn_url_or_wc, rev_number=None):
 237     """
 238     Get SVN information for the given URL or working copy,
 239     with an optionally specified revision number.
 240     Returns a dict as created by parse_svn_info_xml().
 241     """
 242     if rev_number is not None:
 243         args = [svn_url_or_wc + "@" + str(rev_number)]
 244     else:
 245         args = [svn_url_or_wc]
 246     xml_string = run_svn(svn_info_args + args, fail_if_stderr=True)
 247     return parse_svn_info_xml(xml_string)
 248
 249 def svn_checkout(svn_url, checkout_dir, rev_number=None):
 250     """
 251     Checkout the given URL at an optional revision number.
 252     """
 253     args = []
 254     if rev_number is not None:
 255         args += ['-r', rev_number]
 256     args += [svn_url, checkout_dir]
 257     return run_svn(svn_checkout_args + args)
 258
 259 def run_svn_log(svn_url_or_wc, rev_start, rev_end, limit, stop_on_copy=False, get_changed_paths=True):
 260     """
 261     Fetch up to 'limit' SVN log entries between the given revisions.
 262     """
 263     if stop_on_copy:
 264         args = ['--stop-on-copy']
 265     else:
 266         args = []
 267     url = str(svn_url_or_wc)
 268     if rev_start != 'HEAD' and rev_end != 'HEAD':
 269         args += ['-r', '%s:%s' % (rev_start, rev_end)]
 270         if not "@" in svn_url_or_wc:
 271             url += "@" + str(max(rev_start, rev_end))
 272     if get_changed_paths:
 273         args += ['-v']
 274     args += ['--limit', str(limit), url]
 275     xml_string = run_svn(svn_log_args + args)
 276     return parse_svn_log_xml(xml_string)
 277
 278 def get_svn_status(svn_wc, flags=None):
 279     """
 280     Get SVN status information about the given working copy.
 281     """
 282     # Ensure proper stripping by canonicalizing the path
 283     svn_wc = os.path.abspath(svn_wc)
 284     args = []
 285     if flags:
 286         args += [flags]
 287     args += [svn_wc]
 288     xml_string = run_svn(svn_status_args + args)
 289     return parse_svn_status_xml(xml_string, svn_wc)
 290
 291 def get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=False, get_changed_paths=True):
 292     """
 293     Get the first SVN log entry in the requested revision range.
 294     """
 295     entries = run_svn_log(svn_url, rev_start, rev_end, 1, stop_on_copy, get_changed_paths)
 296     if not entries:
 297         display_error("No SVN log for %s between revisions %s and %s" %
 298                       (svn_url, rev_start, rev_end))
 299
 300     return entries[0]
 301
 302 def get_first_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
 303     """
 304     Get the first log entry after/at the given revision number in an SVN branch.
 305     By default the revision number is set to 0, which will give you the log
 306     entry corresponding to the branch creaction.
 307
 308     NOTE: to know whether the branch creation corresponds to an SVN import or
 309     a copy from another branch, inspect elements of the 'changed_paths' entry
 310     in the returned dictionary.
 311     """
 312     return get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True, get_changed_paths=True)
 313
 314 def get_last_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
 315     """
 316     Get the last log entry before/at the given revision number in an SVN branch.
 317     By default the revision number is set to HEAD, which will give you the log
 318     entry corresponding to the latest commit in branch.
 319     """
 320     return get_one_svn_log_entry(svn_url, rev_end, rev_start, stop_on_copy=True, get_changed_paths=True)
 321
 322
 323 log_duration_threshold = 10.0
 324 log_min_chunk_length = 10
 325
 326 def iter_svn_log_entries(svn_url, first_rev, last_rev):
 327     """
 328     Iterate over SVN log entries between first_rev and last_rev.
 329
 330     This function features chunked log fetching so that it isn't too nasty
 331     to the SVN server if many entries are requested.
 332     """
 333     cur_rev = first_rev
 334     chunk_length = log_min_chunk_length
 335     chunk_interval_factor = 1.0
 336     while last_rev == "HEAD" or cur_rev <= last_rev:
 337         start_t = time.time()
 338         stop_rev = min(last_rev, cur_rev + int(chunk_length * chunk_interval_factor))
 339         entries = run_svn_log(svn_url, cur_rev, stop_rev, chunk_length)
 340         duration = time.time() - start_t
 341         if not entries:
 342             if stop_rev == last_rev:
 343                 break
 344             cur_rev = stop_rev + 1
 345             chunk_interval_factor *= 2.0
 346             continue
 347         for e in entries:
 348             yield e
 349         cur_rev = e['revision'] + 1
 350         # Adapt chunk length based on measured request duration
 351         if duration < log_duration_threshold:
 352             chunk_length = int(chunk_length * 2.0)
 353         elif duration > log_duration_threshold * 2:
 354             chunk_length = max(log_min_chunk_length, int(chunk_length / 2.0))
 355
 356 def commit_from_svn_log_entry(entry, files=None, keep_author=False):
 357     """
 358     Given an SVN log entry and an optional sequence of files, do an svn commit.
 359     """
 360     # TODO: Run optional external shell hook here, for doing pre-commit filtering
 361     # This will use the local timezone for displaying commit times
 362     timestamp = int(entry['date'])
 363     svn_date = str(datetime.fromtimestamp(timestamp))
 364     # Uncomment this one one if you prefer UTC commit times
 365     #svn_date = "%d 0" % timestamp
 366     if keep_author:
 367         options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date, "--username", entry['author']]
 368     else:
 369         options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date + "\nAuthor: " + entry['author']]
 370     if files:
 371         options += list(files)
 372     print "(Committing source rev #"+str(entry['revision'])+"...)"
 373     run_svn(options)
 374
 375 def in_svn(p):
 376     """
 377     Check if a given file/folder is being tracked by Subversion.
 378     Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
 379     With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
 380     Use "svn status" to check the status of the file/folder.
 381     """
 382     # TODO: Is there a better way to do this?
 383     entries = get_svn_status(p)
 384     if not entries:
 385       return False
 386     d = entries[0]
 387     return (d['type'] == 'normal')
 388
 389 def find_svn_ancestors(source_repos_url, source_base, source_offset, copyfrom_path, copyfrom_rev):
 390     """
 391     Given a copy-from path (copyfrom_path), walk the SVN history backwards to inspect
 392     the ancestory of that path. Build a collection of copyfrom_path+revision pairs
 393     for each of the branch-copies since the initial branch-creation.  If we find a
 394     copyfrom_path which source_base is a substring match of (e.g. we crawled back to
 395     the initial branch-copy from trunk), then return the collection of ancestor paths.
 396     Otherwise, copyfrom_path has no ancestory compared to source_base.
 397
 398     This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
 399     file/folder was renamed in a branch and then that branch was merged back to trunk.
 400
 401     PARAMETERS:
 402     * source_repos_url = Full URL to root of repository, e.g. 'file:///path/to/repos'
 403     * source_base = e.g. '/trunk'
 404     * source_offset = e.g. 'projectA/file1.txt'
 405     * copyfrom_path = e.g. '/branches/bug123/projectA/file1.txt'
 406     """
 407
 408     done = False
 409     working_path = copyfrom_path
 410     working_base = copyfrom_path[:-len(source_offset)].rstrip('/')
 411     working_offset = source_offset.strip('/')
 412     working_rev = copyfrom_rev
 413     ancestors = [{'path': [working_base, working_offset], 'revision': working_rev}]
 414     while not done:
 415         # Get the first "svn log" entry for this path (relative to @rev)
 416         #working_path = working_base + "/" + working_offset
 417         if debug:
 418             print ">> find_svn_ancestors: " + source_repos_url + working_path + "@" + str(working_rev) + \
 419                    "  (" + working_base + " " + working_offset + ")"
 420         log_entry = get_first_svn_log_entry(source_repos_url + working_path + "@" + str(working_rev), 1, str(working_rev), True)
 421         if not log_entry:
 422             done = True
 423         # Find the action for our working_path in this revision
 424         for d in log_entry['changed_paths']:
 425             path = d['path']
 426             if not path in working_path:
 427                 continue
 428             # Check action-type for this file
 429             action = d['action']
 430             if action not in 'MARD':
 431                 display_error("In SVN rev. %d: action '%s' not supported. \
 432                                Please report a bug!" % (log_entry['revision'], action))
 433             if debug:
 434                 debug_desc = ": " + action + " " + path
 435                 if d['copyfrom_path']:
 436                     debug_desc += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
 437                 print debug_desc
 438
 439             if action == 'R':
 440                 # If file/folder was replaced, it has no ancestor
 441                 return []
 442             if action == 'D':
 443                 # If file/folder was deleted, it has no ancestor
 444                 return []
 445             if action == 'A':
 446                 # If file/folder was added but not a copy, it has no ancestor
 447                 if not d['copyfrom_path']:
 448                     return []
 449                 # Else, file/folder was added and is a copy, so check ancestors
 450                 path_old = d['copyfrom_path']
 451                 working_path = working_path.replace(path, path_old)
 452                 if working_base in working_path:
 453                     # If the new and old working_path share the same working_base, just need to update working_offset.
 454                     working_offset = working_path[len(working_base)+1:]
 455                 else:
 456                     # Else, assume that working_base has changed but working_offset is the same, e.g. a re-branch.
 457                     # TODO: Is this a safe assumption?!
 458                     working_base = working_path[:-len(working_offset)].rstrip('/')
 459                 working_rev = d['copyfrom_revision']
 460                 if debug:
 461                     print ">> find_svn_ancestors: copy-from: " + working_base + " " + working_offset + "@" + str(working_rev)
 462                 ancestors.append({'path': [working_base, working_offset], 'revision': working_rev})
 463                 # If we found a copy-from case which matches our source_base, we're done
 464                 if (path_old == source_base) or (path_old.startswith(source_base + "/")):
 465                     return ancestors
 466                 # Else, follow the copy and keep on searching
 467                 break
 468     return None
 469
 470 def replay_svn_ancestors(ancestors, source_repos_url, source_url, target_url):
 471     """
 472     Given an array of ancestor info (find_svn_ancestors), replay the history
 473     to correctly track renames ("svn copy/move") across branch-merges.
 474
 475     For example, consider a sequence of events like this:
 476     1. svn copy /trunk /branches/fix1
 477     2. (Make some changes on /branches/fix1)
 478     3. svn copy /branches/fix1/Proj1 /branches/fix1/Proj2  " Rename folder
 479     4. svn copy /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt  " Rename file inside renamed folder
 480     5. svn co /trunk && svn merge /branches/fix1
 481     After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
 482     and and add of /trunk/Proj2 comp-from /branches/fix1/Proj2. If we were just
 483     to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
 484     we'd lose the logical history that Proj2/file2.txt is really a descendant
 485     of Proj1/file1.txt.
 486
 487     'source_repos_url' is the full URL to the root of the source repository.
 488     'ancestors' is the array returned by find_svn_ancestors() with the final
 489       destination info appended to it by process_svn_log_entry().
 490     'dest_path'
 491     """
 492     # Ignore ancestors[0], which is the original (pre-branch-copy) trunk path
 493     # Ignore ancestors[1], which is the original branch-creation commit
 494     # Ignore ancestors[n], which is the final commit back to trunk
 495     for idx in range(1, len(ancestors)-1):
 496         ancestor = ancestors[idx]
 497         source_base = ancestor['path'][0]
 498         source_offset = ancestor['path'][1]
 499         source_path = source_base + "/" + source_offset
 500         source_rev = ancestor['revision']
 501         source_rev_next = ancestors[idx+1]['revision']
 502         # Do a "svn log" on the _parent_ directory of source_path, since trying to get log info
 503         # for the "old path" on the revision where the copy/move happened will fail.
 504         if "/" in source_path:
 505             p_source_path = source_path[:source_path.rindex('/')]
 506         else:
 507             p_source_path = ""
 508         if debug:
 509             print ">> replay_svn_ancestors: ["+str(idx)+"]" + source_path+"@"+str(source_rev) + "  ["+p_source_path+"@"+str(source_rev)+":"+str(source_rev_next-1)+"]"
 510         it_log_entries = iter_svn_log_entries(source_repos_url+p_source_path, source_rev, source_rev_next-1)
 511         for log_entry in it_log_entries:
 512             #print ">> replay_svn_ancestors: log_entry: (" + source_repos_url+source_base + ")"
 513             #print log_entry
 514             # TODO: Hit a problem case with a rename-situation where the "remove" was committed ahead of the "add (copy)".
 515             #       Do we maybe need to buffer all the remove's until the end of the entire replay session?
 516             #       Or can we maybe work around this by passing an explicit rev # into "svn copy"?
 517             process_svn_log_entry(log_entry, source_repos_url, source_repos_url+source_base, target_url)
 518
 519 def process_svn_log_entry(log_entry, source_repos_url, source_url, target_url):
 520     """
 521     Process SVN changes from the given log entry.
 522     Returns array of all the paths in the working-copy that were changed,
 523     i.e. the paths which need to be "svn commit".
 524
 525     'log_entry' is the array structure built by parse_svn_log_xml().
 526     'source_repos_url' is the full URL to the root of the source repository.
 527     'source_url' is the full URL to the source path in the source repository.
 528     'target_url' is the full URL to the target path in the target repository.
 529     """
 530     # Get the relative offset of source_url based on source_repos_url, e.g. u'/branches/bug123'
 531     source_base = source_url[len(source_repos_url):]
 532     if debug:
 533         print ">> process_svn_log_entry: " + source_url + " (" + source_base + ")"
 534
 535     svn_rev = log_entry['revision']
 536     # Get current target revision, for "svn copy" support
 537     dup_info = get_svn_info(target_url)
 538     dup_rev = dup_info['revision']
 539
 540     removed_paths = []
 541     modified_paths = []
 542     unrelated_paths = []
 543     commit_paths = []
 544
 545     for d in log_entry['changed_paths']:
 546         # Get the full path for this changed_path
 547         # e.g. u'/branches/bug123/projectA/file1.txt'
 548         path = d['path']
 549         if not path.startswith(source_base + "/"):
 550             # Ignore changed files that are not part of this subdir
 551             if path != source_base:
 552                 print ">> process_svn_log_entry: Unrelated path: " + path + "  (" + source_base + ")"
 553                 unrelated_paths.append(path)
 554             continue
 555         # Calculate the offset (based on source_base) for this changed_path
 556         # e.g. u'projectA/file1.txt'
 557         # (path = source_base + "/" + path_offset)
 558         path_offset = path[len(source_base):].strip("/")
 559         # Get the action for this path
 560         action = d['action']
 561         if action not in 'MARD':
 562             display_error("In SVN rev. %d: action '%s' not supported. \
 563                            Please report a bug!" % (svn_rev, action))
 564
 565         # Try to be efficient and keep track of an explicit list of paths in the
 566         # working copy that changed. If we commit from the root of the working copy,
 567         # then SVN needs to crawl the entire working copy looking for pending changes.
 568         # But, if we gather too many paths to commit, then we wipe commit_paths below
 569         # and end-up doing a commit at the root of the working-copy.
 570         if len (commit_paths) < 100:
 571             commit_paths.append(path_offset)
 572
 573         # Special-handling for replace's
 574         is_replace = False
 575         if action == 'R':
 576             if svnlog_verbose:
 577                 msg = " " + d['action'] + " " + d['path']
 578                 if d['copyfrom_path']:
 579                     msg += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
 580                 print msg
 581             # If file was "replaced" (deleted then re-added, all in same revision),
 582             # then we need to run the "svn rm" first, then change action='A'. This
 583             # lets the normal code below handle re-"svn add"'ing the files. This
 584             # should replicate the "replace".
 585             run_svn(["up", path_offset])
 586             run_svn(["remove", "--force", path_offset])
 587             action = 'A'
 588             is_replace = True
 589
 590         # Handle all the various action-types
 591         # (Handle "add" first, for "svn copy/move" support)
 592         if action == 'A':
 593             if svnlog_verbose:
 594                 msg = " " + d['action'] + " " + d['path']
 595                 if d['copyfrom_path']:
 596                     msg += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
 597                 print msg
 598             # Determine where to export from
 599             copyfrom_rev = svn_rev
 600             copyfrom_path = path
 601             svn_copy = False
 602             # Handle cases where this "add" was a copy from another URL in the source repos
 603             if d['copyfrom_revision']:
 604                 copyfrom_rev = d['copyfrom_revision']
 605                 copyfrom_path = d['copyfrom_path']
 606                 if debug:
 607                     print ">> process_svn_log_entry: copy-to: " + source_base + " " + path_offset
 608                 if source_base in copyfrom_path:
 609                     # If the copy-from path is inside the current working-copy, no need to check ancestry.
 610                     ancestors = []
 611                     copyfrom_path = copyfrom_path[len(source_base):].strip("/")
 612                     if debug:
 613                         print ">> process_svn_log_entry: Found copy: " + copyfrom_path+"@"+str(copyfrom_rev)
 614                     svn_copy = True
 615                 else:
 616                     ancestors = find_svn_ancestors(source_repos_url, source_base, path_offset,
 617                                                    copyfrom_path, copyfrom_rev)
 618                 if ancestors:
 619                     # Reverse the list, so that we loop in chronological order
 620                     ancestors.reverse()
 621                     # Append the current revision
 622                     ancestors.append({'path': [source_base, path_offset], 'revision': svn_rev})
 623                     # ancestors[0] is the original (pre-branch-copy) trunk path.
 624                     # ancestors[1] is the first commit on the new branch.
 625                     copyfrom_rev =  ancestors[0]['revision']
 626                     copyfrom_base = ancestors[0]['path'][0]
 627                     copyfrom_offset = ancestors[0]['path'][1]
 628                     copyfrom_path = copyfrom_base + copyfrom_offset
 629                     if debug:
 630                         print ">> process_svn_log_entry: FOUND PARENT:"
 631                         for idx in range(0,len(ancestors)):
 632                             ancestor = ancestors[idx]
 633                             print "     ["+str(idx)+"] " + ancestor['path'][0]+" "+ancestor['path'][1]+"@"+str(ancestor['revision'])
 634                     #print ">> process_svn_log_entry: copyfrom_path (before): " + copyfrom_path + " source_base: " + source_base + " p: " + p
 635                     copyfrom_path = copyfrom_path[len(source_base):].strip("/")
 636                     #print ">> process_svn_log_entry: copyfrom_path (after): " + copyfrom_path
 637                     svn_copy = True
 638             # If this add was a copy-from, do a smart replay of the ancestors' history.
 639             # Else just copy/export the files from the source repo and "svn add" them.
 640             if svn_copy:
 641                 if debug:
 642                     print ">> process_svn_log_entry: svn_copy: copy-from: " + copyfrom_path+"@"+str(copyfrom_rev) + "  source_base: "+source_base + "  len(ancestors): " + str(len(ancestors))
 643                 # If we don't have any ancestors, then this is just a straight "svn copy" in the current working-copy.
 644                 if not ancestors:
 645                     # ...but not if the target is already tracked, because this might run several times for the same path.
 646                     # TODO: Is there a better way to avoid recusion bugs? Maybe a collection of processed paths?
 647                     if not in_svn(path_offset):
 648                         run_svn(["copy", "-r", dup_rev, target_url+"/"+copyfrom_path+"@"+str(dup_rev), path_offset])
 649                 else:
 650                     if d['kind'] == 'dir':
 651                         # Replay any actions which happened to this folder from the ancestor path(s).
 652                         replay_svn_ancestors(ancestors, source_repos_url, source_url, target_url)
 653                     else:
 654                         # Just do a straight "svn copy" for files. There isn't any kind of "dependent"
 655                         # history we might need to replay like for folders.
 656                         run_svn(["copy", "-r", dup_rev, target_url+"/"+copyfrom_path+"@"+str(dup_rev), path_offset])
 657             else:
 658                 # Create (parent) directory if needed
 659                 if d['kind'] == 'dir':
 660                     p_path = path_offset
 661                 else:
 662                     p_path = os.path.dirname(path_offset).strip() or '.'
 663                 if not os.path.exists(p_path):
 664                     os.makedirs(p_path)
 665                 # Export the entire added tree.
 666                 run_svn(["export", "--force", "-r", str(copyfrom_rev),
 667                          source_repos_url + copyfrom_path + "@" + str(copyfrom_rev), path_offset])
 668                 # TODO: The "no in_svn" condition here is wrong for replace cases.
 669                 #       Added the in_svn condition here originally since "svn export" is recursive
 670                 #       but "svn log" will have an entry for each indiv file, hence we run into a
 671                 #       cannot-re-add-file-which-is-already-added issue.
 672                 if (not in_svn(path_offset)) or (is_replace):
 673                     run_svn(["add", "--parents", path_offset])
 674                 # TODO: Need to copy SVN properties from source repos
 675
 676         elif action == 'D':
 677             # Queue "svn remove" commands, to allow the action == 'A' handling the opportunity
 678             # to do smart "svn copy" handling on copy/move/renames.
 679             removed_paths.append(path_offset)
 680
 681         elif action == 'R':
 682             # TODO
 683             display_error("Internal Error: Handling for action='R' not implemented yet.")
 684
 685         elif action == 'M':
 686             modified_paths.append(path_offset)
 687
 688         else:
 689             display_error("Internal Error: pull_svn_rev: Unhandled 'action' value: '" + action + "'")
 690
 691     if removed_paths:
 692         for r in removed_paths:
 693             if svnlog_verbose:
 694                 print " D " + r
 695             # TODO: Is the "svn up" here needed?
 696             run_svn(["up", r])
 697             run_svn(["remove", "--force", r])
 698
 699     if modified_paths:
 700         for m in modified_paths:
 701             if svnlog_verbose:
 702                 print " M " + m
 703             # TODO: Is the "svn up" here needed?
 704             run_svn(["up", m])
 705             m_url = source_url + "/" + m
 706             out = run_svn(["merge", "-c", str(svn_rev), "--non-recursive",
 707                      "--non-interactive", "--accept=theirs-full",
 708                      m_url+"@"+str(svn_rev), m])
 709
 710     if unrelated_paths:
 711         print "Unrelated paths: (vs. '" + source_base + "')"
 712         print "*", unrelated_paths
 713
 714     return commit_paths
 715
 716 def pull_svn_rev(log_entry, source_repos_url, source_url, target_url, keep_author=False):
 717     """
 718     Pull SVN changes from the given log entry.
 719     Returns the new SVN revision.
 720     If an exception occurs, it will rollback to revision 'svn_rev - 1'.
 721     """
 722     ## Get the relative offset of source_url based on source_repos_url, e.g. u'/branches/bug123'
 723     #source_base = source_url[len(source_repos_url):]
 724
 725     svn_rev = log_entry['revision']
 726     print "\n(Starting source rev #"+str(svn_rev)+":)"
 727     print "r"+str(log_entry['revision']) + " | " + \
 728           log_entry['author'] + " | " + \
 729           str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' '))
 730     print log_entry['message']
 731     print "------------------------------------------------------------------------"
 732     commit_paths = process_svn_log_entry(log_entry, source_repos_url, source_url, target_url)
 733
 734     # If we had too many individual paths to commit, wipe the list and just commit at
 735     # the root of the working copy.
 736     if len (commit_paths) > 99:
 737         commit_paths = []
 738
 739     # TODO: Use SVN properties to track source URL + rev in the target repo?
 740     #       This would provide a more reliable resume-support
 741     try:
 742         commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author)
 743     except ExternalCommandFailed:
 744         # try to ignore the Properties conflicts on files and dirs
 745         # use the copy from original_wc
 746         # TODO: Need to re-work this?
 747         #has_Conflict = False
 748         #for d in log_entry['changed_paths']:
 749         #    p = d['path']
 750         #    p = p[len(source_base):].strip("/")
 751         #    if os.path.isfile(p):
 752         #        if os.path.isfile(p + ".prej"):
 753         #            has_Conflict = True
 754         #            shutil.copy(original_wc + os.sep + p, p)
 755         #            p2=os.sep + p.replace('_', '__').replace('/', '_') \
 756         #                      + ".prej-" + str(svn_rev)
 757         #            shutil.move(p + ".prej", os.path.dirname(original_wc) + p2)
 758         #            w="\n### Properties conflicts ignored:"
 759         #            print "%s %s, in revision: %s\n" % (w, p, svn_rev)
 760         #    elif os.path.isdir(p):
 761         #        if os.path.isfile(p + os.sep + "dir_conflicts.prej"):
 762         #            has_Conflict = True
 763         #            p2=os.sep + p.replace('_', '__').replace('/', '_') \
 764         #                      + "_dir__conflicts.prej-" + str(svn_rev)
 765         #            shutil.move(p + os.sep + "dir_conflicts.prej",
 766         #                        os.path.dirname(original_wc) + p2)
 767         #            w="\n### Properties conflicts ignored:"
 768         #            print "%s %s, in revision: %s\n" % (w, p, svn_rev)
 769         #            out = run_svn(["propget", "svn:ignore",
 770         #                           original_wc + os.sep + p])
 771         #            if out:
 772         #                run_svn(["propset", "svn:ignore", out.strip(), p])
 773         #            out = run_svn(["propget", "svn:externel",
 774         #                           original_wc + os.sep + p])
 775         #            if out:
 776         #                run_svn(["propset", "svn:external", out.strip(), p])
 777         ## try again
 778         #if has_Conflict:
 779         #    commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author)
 780         #else:
 781             raise ExternalCommandFailed
 782     print "(Finished source rev #"+str(svn_rev)+")"
 783
 784
 785 def main():
 786     usage = "Usage: %prog [-a] [-c] [-r SVN rev] <Source SVN URL> <Target SVN URL>"
 787     parser = OptionParser(usage)
 788     parser.add_option("-a", "--keep-author", action="store_true",
 789                       dest="keep_author", help="Keep revision Author or not")
 790     parser.add_option("-c", "--continue-from-break", action="store_true",
 791                       dest="cont_from_break",
 792                       help="Continue from previous break")
 793     parser.add_option("-r", "--svn-rev", type="int", dest="svn_rev",
 794                       help="SVN revision to checkout from")
 795     (options, args) = parser.parse_args()
 796     if len(args) != 2:
 797         display_error("incorrect number of arguments\n\nTry: svn2svn.py --help",
 798                       False)
 799
 800     source_url = args.pop(0).rstrip("/")
 801     target_url = args.pop(0).rstrip("/")
 802     if options.keep_author:
 803         keep_author = True
 804     else:
 805         keep_author = False
 806
 807     # Find the greatest_rev in the source repo
 808     svn_info = get_svn_info(source_url)
 809     greatest_rev = svn_info['revision']
 810
 811     dup_wc = "_dup_wc"
 812
 813     # if old working copy does not exist, disable continue mode
 814     # TODO: Better continue support. Maybe include source repo's rev # in target commit info?
 815     if not os.path.exists(dup_wc):
 816         options.cont_from_break = False
 817
 818     if not options.cont_from_break:
 819         # Warn if Target SVN URL existed
 820         cmd = find_program("svn")
 821         pipe = Popen([cmd] + ["list"] + [target_url], executable=cmd,
 822                      stdout=PIPE, stderr=PIPE)
 823         out, err = pipe.communicate()
 824         if pipe.returncode == 0:
 825             print "Target SVN URL: %s existed!" % target_url
 826             if out:
 827                 print out
 828             print "Press 'Enter' to Continue, 'Ctrl + C' to Cancel..."
 829             print "(Timeout in 5 seconds)"
 830             rfds, wfds, efds = select.select([sys.stdin], [], [], 5)
 831
 832         # Get log entry for the SVN revision we will check out
 833         if options.svn_rev:
 834             # If specify a rev, get log entry just before or at rev
 835             svn_start_log = get_last_svn_log_entry(source_url, 1, options.svn_rev, False)
 836         else:
 837             # Otherwise, get log entry of branch creation
 838             # TODO: This call is *very* expensive on a repo with lots of revisions.
 839             #       Even though the call is passing --limit 1, it seems like that limit-filter
 840             #       is happening after SVN has fetched the full log history.
 841             svn_start_log = get_first_svn_log_entry(source_url, 1, greatest_rev, False)
 842
 843         # This is the revision we will start from for source_url
 844         svn_rev = svn_start_log['revision']
 845
 846         # Check out a working copy of target_url
 847         dup_wc = os.path.abspath(dup_wc)
 848         if os.path.exists(dup_wc):
 849             shutil.rmtree(dup_wc)
 850         svn_checkout(target_url, dup_wc)
 851         os.chdir(dup_wc)
 852
 853         # For the initial commit to the target URL, export all the contents from
 854         # the source URL at the start-revision.
 855         paths = run_svn(["list", "-r", str(svn_rev), source_url+"@"+str(svn_rev)])
 856         paths = paths.strip("\n").split("\n")
 857         for path in paths:
 858             if not path:
 859                 # Skip null lines
 860                 break
 861             # Directories have a trailing slash in the "svn list" output
 862             if path[-1] == "/":
 863                 path=path.rstrip('/')
 864                 if not os.path.exists(path):
 865                     os.makedirs(path)
 866             run_svn(["export", "--force", "-r" , str(svn_rev), source_url+"/"+path+"@"+str(svn_rev), path])
 867             run_svn(["add", path])
 868         commit_from_svn_log_entry(svn_start_log, [], keep_author)
 869     else:
 870         dup_wc = os.path.abspath(dup_wc)
 871         os.chdir(dup_wc)
 872         # TODO: Need better resume support. For the time being, expect caller explictly passes in resume revision.
 873         svn_rev = options.svn_rev
 874         if svn_rev < 1:
 875             display_error("Invalid arguments\n\nNeed to pass result rev # (-r) when using continue-mode (-c)", False)
 876
 877
 878     # Get SVN info
 879     svn_info = get_svn_info(source_url)
 880     # Get the base URL for the source repos, e.g. u'svn://svn.example.com/svn/repo'
 881     source_repos_url = svn_info['repos_url']
 882
 883     # Load SVN log starting from svn_rev + 1
 884     it_log_entries = iter_svn_log_entries(source_url, svn_rev + 1, greatest_rev)
 885
 886     try:
 887         for log_entry in it_log_entries:
 888             # Replay this revision from source_url into target_url
 889             pull_svn_rev(log_entry, source_repos_url, source_url, target_url, keep_author)
 890             # Update our target working-copy, to ensure everything says it's at the new HEAD revision
 891             run_svn(["up", dup_wc])
 892
 893     except KeyboardInterrupt:
 894         print "\nStopped by user."
 895         run_svn(["cleanup"])
 896         run_svn(["revert", "--recursive", "."])
 897     except:
 898         print "\nCommand failed with following error:\n"
 899         traceback.print_exc()
 900         run_svn(["cleanup"])
 901         run_svn(["revert", "--recursive", "."])
 902     finally:
 903         run_svn(["up"])
 904         print "\nFinished!"
 905
 906
 907 if __name__ == "__main__":
 908     main()
 909
 910 # vim:sts=4:sw=4: