svn2svn.py

   1 #!/usr/bin/env python
   2 """
   3 svn2svn.py
   4
   5 Replicate (replay) changesets from one SVN repository to another:
   6 * Maintains full logical history (e.g. uses "svn copy" for renames).
   7 * Maintains original commit messages.
   8 * Cannot maintain original commit date, but appends original commit date
   9   for each commit message: "Date: %d".
  10 * Optionally maintain source author info. (Only supported if accessing
  11   target SVN repo via file://)
  12 * Optionally run an external shell script before each replayed commit
  13   to give the ability to dynamically exclude or modify files as part
  14   of the replay.
  15
  16 License: GPLv2, the same as hgsvn.
  17 Author: Tony Duckles (https://github.com/tonyduckles/svn2svn)
  18 (This is a forked and modified verison of http://code.google.com/p/svn2svn/)
  19 """
  20
  21 import os
  22 import sys
  23 import time
  24 import locale
  25 import shutil
  26 import select
  27 import calendar
  28 import traceback
  29
  30 from optparse import OptionParser
  31 from subprocess import Popen, PIPE
  32 from datetime import datetime
  33 from operator import itemgetter
  34
  35 try:
  36     from xml.etree import cElementTree as ET
  37 except ImportError:
  38     try:
  39         from xml.etree import ElementTree as ET
  40     except ImportError:
  41         try:
  42             import cElementTree as ET
  43         except ImportError:
  44             from elementtree import ElementTree as ET
  45
  46 svn_log_args = ['log', '--xml']
  47 svn_info_args = ['info', '--xml']
  48 svn_checkout_args = ['checkout', '-q']
  49 svn_status_args = ['status', '--xml', '-v', '--ignore-externals']
  50
  51 # Setup debug options
  52 debug = False
  53 debug_runsvn_timing = False    # Display how long each "svn" OS command took to run?
  54 # Setup verbosity options
  55 runsvn_showcmd = False    # Display every "svn" OS command we run?
  56 runsvn_showout = False    # Display the stdout results from every  "svn" OS command we run?
  57 svnlog_verbose = True     # Display each action + changed-path as we walk the history?
  58
  59 # define exception class
  60 class ExternalCommandFailed(RuntimeError):
  61     """
  62     An external command failed.
  63     """
  64
  65 def display_error(message, raise_exception = True):
  66     """
  67     Display error message, then terminate.
  68     """
  69     print "Error:", message
  70     print
  71     if raise_exception:
  72         raise ExternalCommandFailed
  73     else:
  74         sys.exit(1)
  75
  76 # Windows compatibility code by Bill Baxter
  77 if os.name == "nt":
  78     def find_program(name):
  79         """
  80         Find the name of the program for Popen.
  81         Windows is finnicky about having the complete file name. Popen
  82         won't search the %PATH% for you automatically.
  83         (Adapted from ctypes.find_library)
  84         """
  85         # See MSDN for the REAL search order.
  86         base, ext = os.path.splitext(name)
  87         if ext:
  88             exts = [ext]
  89         else:
  90             exts = ['.bat', '.exe']
  91         for directory in os.environ['PATH'].split(os.pathsep):
  92             for e in exts:
  93                 fname = os.path.join(directory, base + e)
  94                 if os.path.exists(fname):
  95                     return fname
  96         return None
  97 else:
  98     def find_program(name):
  99         """
 100         Find the name of the program for Popen.
 101         On Unix, popen isn't picky about having absolute paths.
 102         """
 103         return name
 104
 105 def shell_quote(s):
 106     if os.name == "nt":
 107         q = '"'
 108     else:
 109         q = "'"
 110     return q + s.replace('\\', '\\\\').replace("'", "'\"'\"'") + q
 111
 112 locale_encoding = locale.getpreferredencoding()
 113
 114 def run_svn(args, fail_if_stderr=False, encoding="utf-8"):
 115     """
 116     Run svn cmd in PIPE
 117     exit if svn cmd failed
 118     """
 119     def _transform_arg(a):
 120         if isinstance(a, unicode):
 121             a = a.encode(encoding or locale_encoding)
 122         elif not isinstance(a, str):
 123             a = str(a)
 124         return a
 125     t_args = map(_transform_arg, args)
 126
 127     cmd = find_program("svn")
 128     cmd_string = str(" ".join(map(shell_quote, [cmd] + t_args)))
 129     if runsvn_showcmd:
 130         print "$", "("+os.getcwd()+")", cmd_string
 131     if debug_runsvn_timing:
 132         time1 = time.time()
 133     pipe = Popen([cmd] + t_args, executable=cmd, stdout=PIPE, stderr=PIPE)
 134     out, err = pipe.communicate()
 135     if debug_runsvn_timing:
 136         time2 = time.time()
 137         print "(" + str(round(time2-time1,4)) + " elapsed)"
 138     if out and runsvn_showout:
 139         print out
 140     if pipe.returncode != 0 or (fail_if_stderr and err.strip()):
 141         display_error("External program failed (return code %d): %s\n%s"
 142             % (pipe.returncode, cmd_string, err))
 143     return out
 144
 145 def svn_date_to_timestamp(svn_date):
 146     """
 147     Parse an SVN date as read from the XML output and
 148     return the corresponding timestamp.
 149     """
 150     # Strip microseconds and timezone (always UTC, hopefully)
 151     # XXX there are various ISO datetime parsing routines out there,
 152     # cf. http://seehuhn.de/comp/pdate
 153     date = svn_date.split('.', 2)[0]
 154     time_tuple = time.strptime(date, "%Y-%m-%dT%H:%M:%S")
 155     return calendar.timegm(time_tuple)
 156
 157 def parse_svn_info_xml(xml_string):
 158     """
 159     Parse the XML output from an "svn info" command and extract
 160     useful information as a dict.
 161     """
 162     d = {}
 163     tree = ET.fromstring(xml_string)
 164     entry = tree.find('.//entry')
 165     if entry:
 166         d['url'] = entry.find('url').text
 167         d['revision'] = int(entry.get('revision'))
 168         d['repos_url'] = tree.find('.//repository/root').text
 169         d['last_changed_rev'] = int(tree.find('.//commit').get('revision'))
 170         d['kind'] = entry.get('kind')
 171     return d
 172
 173 def parse_svn_log_xml(xml_string):
 174     """
 175     Parse the XML output from an "svn log" command and extract
 176     useful information as a list of dicts (one per log changeset).
 177     """
 178     l = []
 179     tree = ET.fromstring(xml_string)
 180     for entry in tree.findall('logentry'):
 181         d = {}
 182         d['revision'] = int(entry.get('revision'))
 183         # Some revisions don't have authors, most notably
 184         # the first revision in a repository.
 185         author = entry.find('author')
 186         d['author'] = author is not None and author.text or None
 187         d['date'] = svn_date_to_timestamp(entry.find('date').text)
 188         # Some revisions may have empty commit message
 189         message = entry.find('msg')
 190         message = message is not None and message.text is not None \
 191                         and message.text.strip() or ""
 192         # Replace DOS return '\r\n' and MacOS return '\r' with unix return '\n'
 193         d['message'] = message.replace('\r\n', '\n').replace('\n\r', '\n'). \
 194                                replace('\r', '\n')
 195         paths = []
 196         for path in entry.findall('.//path'):
 197             copyfrom_rev = path.get('copyfrom-rev')
 198             if copyfrom_rev:
 199                 copyfrom_rev = int(copyfrom_rev)
 200             paths.append({
 201                 'path': path.text,
 202                 'kind': path.get('kind'),
 203                 'action': path.get('action'),
 204                 'copyfrom_path': path.get('copyfrom-path'),
 205                 'copyfrom_revision': copyfrom_rev,
 206             })
 207         # Need to sort paths (i.e. into hierarchical order), so that process_svn_log_entry()
 208         # can process actions in depth-first order.
 209         d['changed_paths'] = sorted(paths, key=itemgetter('path'))
 210         l.append(d)
 211     return l
 212
 213 def parse_svn_status_xml(xml_string, base_dir=None):
 214     """
 215     Parse the XML output from an "svn status" command and extract
 216     useful info as a list of dicts (one per status entry).
 217     """
 218     l = []
 219     tree = ET.fromstring(xml_string)
 220     for entry in tree.findall('.//entry'):
 221         d = {}
 222         path = entry.get('path')
 223         if base_dir is not None:
 224             assert path.startswith(base_dir)
 225             path = path[len(base_dir):].lstrip('/\\')
 226         d['path'] = path
 227         wc_status = entry.find('wc-status')
 228         if wc_status.get('item') == 'external':
 229             d['type'] = 'external'
 230         elif wc_status.get('revision') is not None:
 231             d['type'] = 'normal'
 232         else:
 233             d['type'] = 'unversioned'
 234         l.append(d)
 235     return l
 236
 237 def get_svn_info(svn_url_or_wc, rev_number=None):
 238     """
 239     Get SVN information for the given URL or working copy,
 240     with an optionally specified revision number.
 241     Returns a dict as created by parse_svn_info_xml().
 242     """
 243     if rev_number is not None:
 244         args = [svn_url_or_wc + "@" + str(rev_number)]
 245     else:
 246         args = [svn_url_or_wc]
 247     xml_string = run_svn(svn_info_args + args, fail_if_stderr=True)
 248     return parse_svn_info_xml(xml_string)
 249
 250 def svn_checkout(svn_url, checkout_dir, rev_number=None):
 251     """
 252     Checkout the given URL at an optional revision number.
 253     """
 254     args = []
 255     if rev_number is not None:
 256         args += ['-r', rev_number]
 257     args += [svn_url, checkout_dir]
 258     return run_svn(svn_checkout_args + args)
 259
 260 def run_svn_log(svn_url_or_wc, rev_start, rev_end, limit, stop_on_copy=False, get_changed_paths=True):
 261     """
 262     Fetch up to 'limit' SVN log entries between the given revisions.
 263     """
 264     if stop_on_copy:
 265         args = ['--stop-on-copy']
 266     else:
 267         args = []
 268     url = str(svn_url_or_wc)
 269     if rev_start != 'HEAD' and rev_end != 'HEAD':
 270         args += ['-r', '%s:%s' % (rev_start, rev_end)]
 271         if not "@" in svn_url_or_wc:
 272             url += "@" + str(max(rev_start, rev_end))
 273     if get_changed_paths:
 274         args += ['-v']
 275     args += ['--limit', str(limit), url]
 276     xml_string = run_svn(svn_log_args + args)
 277     return parse_svn_log_xml(xml_string)
 278
 279 def get_svn_status(svn_wc, flags=None):
 280     """
 281     Get SVN status information about the given working copy.
 282     """
 283     # Ensure proper stripping by canonicalizing the path
 284     svn_wc = os.path.abspath(svn_wc)
 285     args = []
 286     if flags:
 287         args += [flags]
 288     args += [svn_wc]
 289     xml_string = run_svn(svn_status_args + args)
 290     return parse_svn_status_xml(xml_string, svn_wc)
 291
 292 def get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=False, get_changed_paths=True):
 293     """
 294     Get the first SVN log entry in the requested revision range.
 295     """
 296     entries = run_svn_log(svn_url, rev_start, rev_end, 1, stop_on_copy, get_changed_paths)
 297     if not entries:
 298         display_error("No SVN log for %s between revisions %s and %s" %
 299                       (svn_url, rev_start, rev_end))
 300
 301     return entries[0]
 302
 303 def get_first_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
 304     """
 305     Get the first log entry after/at the given revision number in an SVN branch.
 306     By default the revision number is set to 0, which will give you the log
 307     entry corresponding to the branch creaction.
 308
 309     NOTE: to know whether the branch creation corresponds to an SVN import or
 310     a copy from another branch, inspect elements of the 'changed_paths' entry
 311     in the returned dictionary.
 312     """
 313     return get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True, get_changed_paths=True)
 314
 315 def get_last_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
 316     """
 317     Get the last log entry before/at the given revision number in an SVN branch.
 318     By default the revision number is set to HEAD, which will give you the log
 319     entry corresponding to the latest commit in branch.
 320     """
 321     return get_one_svn_log_entry(svn_url, rev_end, rev_start, stop_on_copy=True, get_changed_paths=True)
 322
 323
 324 log_duration_threshold = 10.0
 325 log_min_chunk_length = 10
 326
 327 def iter_svn_log_entries(svn_url, first_rev, last_rev):
 328     """
 329     Iterate over SVN log entries between first_rev and last_rev.
 330
 331     This function features chunked log fetching so that it isn't too nasty
 332     to the SVN server if many entries are requested.
 333     """
 334     cur_rev = first_rev
 335     chunk_length = log_min_chunk_length
 336     chunk_interval_factor = 1.0
 337     while last_rev == "HEAD" or cur_rev <= last_rev:
 338         start_t = time.time()
 339         stop_rev = min(last_rev, cur_rev + int(chunk_length * chunk_interval_factor))
 340         entries = run_svn_log(svn_url, cur_rev, stop_rev, chunk_length)
 341         duration = time.time() - start_t
 342         if not entries:
 343             if stop_rev == last_rev:
 344                 break
 345             cur_rev = stop_rev + 1
 346             chunk_interval_factor *= 2.0
 347             continue
 348         for e in entries:
 349             yield e
 350         cur_rev = e['revision'] + 1
 351         # Adapt chunk length based on measured request duration
 352         if duration < log_duration_threshold:
 353             chunk_length = int(chunk_length * 2.0)
 354         elif duration > log_duration_threshold * 2:
 355             chunk_length = max(log_min_chunk_length, int(chunk_length / 2.0))
 356
 357 def commit_from_svn_log_entry(entry, files=None, keep_author=False):
 358     """
 359     Given an SVN log entry and an optional sequence of files, do an svn commit.
 360     """
 361     # TODO: Run optional external shell hook here, for doing pre-commit filtering
 362     # This will use the local timezone for displaying commit times
 363     timestamp = int(entry['date'])
 364     svn_date = str(datetime.fromtimestamp(timestamp))
 365     # Uncomment this one one if you prefer UTC commit times
 366     #svn_date = "%d 0" % timestamp
 367     if keep_author:
 368         options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date, "--username", entry['author']]
 369     else:
 370         options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date + "\nAuthor: " + entry['author']]
 371     if files:
 372         options += list(files)
 373     print "(Committing source rev #"+str(entry['revision'])+"...)"
 374     run_svn(options)
 375
 376 def in_svn(p):
 377     """
 378     Check if a given file/folder is being tracked by Subversion.
 379     Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
 380     With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
 381     Use "svn status" to check the status of the file/folder.
 382     """
 383     # TODO: Is there a better way to do this?
 384     entries = get_svn_status(p)
 385     if not entries:
 386       return False
 387     d = entries[0]
 388     return (d['type'] == 'normal')
 389
 390 def find_svn_ancestors(source_repos_url, source_base, source_offset, copyfrom_path, copyfrom_rev):
 391     """
 392     Given a copy-from path (copyfrom_path), walk the SVN history backwards to inspect
 393     the ancestory of that path. Build a collection of copyfrom_path+revision pairs
 394     for each of the branch-copies since the initial branch-creation.  If we find a
 395     copyfrom_path which source_base is a substring match of (e.g. we crawled back to
 396     the initial branch-copy from trunk), then return the collection of ancestor paths.
 397     Otherwise, copyfrom_path has no ancestory compared to source_base.
 398
 399     This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
 400     file/folder was renamed in a branch and then that branch was merged back to trunk.
 401
 402     PARAMETERS:
 403     * source_repos_url = Full URL to root of repository, e.g. 'file:///path/to/repos'
 404     * source_base = e.g. '/trunk'
 405     * source_offset = e.g. 'projectA/file1.txt'
 406     * copyfrom_path = e.g. '/branches/bug123/projectA/file1.txt'
 407     """
 408
 409     done = False
 410     working_path = copyfrom_path
 411     working_base = copyfrom_path[:-len(source_offset)].rstrip('/')
 412     working_offset = source_offset.strip('/')
 413     working_rev = copyfrom_rev
 414     ancestors = [{'path': [working_base, working_offset], 'revision': working_rev}]
 415     while not done:
 416         # Get the first "svn log" entry for this path (relative to @rev)
 417         #working_path = working_base + "/" + working_offset
 418         if debug:
 419             print ">> find_svn_ancestors: " + source_repos_url + working_path + "@" + str(working_rev) + \
 420                    "  (" + working_base + " " + working_offset + ")"
 421         log_entry = get_first_svn_log_entry(source_repos_url + working_path + "@" + str(working_rev), 1, str(working_rev), True)
 422         if not log_entry:
 423             done = True
 424         # Find the action for our working_path in this revision
 425         for d in log_entry['changed_paths']:
 426             path = d['path']
 427             if not path in working_path:
 428                 continue
 429             # Check action-type for this file
 430             action = d['action']
 431             if action not in 'MARD':
 432                 display_error("In SVN rev. %d: action '%s' not supported. \
 433                                Please report a bug!" % (log_entry['revision'], action))
 434             if debug:
 435                 debug_desc = ": " + action + " " + path
 436                 if d['copyfrom_path']:
 437                     debug_desc += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
 438                 print debug_desc
 439
 440             if action == 'R':
 441                 # If file/folder was replaced, it has no ancestor
 442                 return []
 443             if action == 'D':
 444                 # If file/folder was deleted, it has no ancestor
 445                 return []
 446             if action == 'A':
 447                 # If file/folder was added but not a copy, it has no ancestor
 448                 if not d['copyfrom_path']:
 449                     return []
 450                 # Else, file/folder was added and is a copy, so check ancestors
 451                 path_old = d['copyfrom_path']
 452                 working_path = working_path.replace(path, path_old)
 453                 if working_base in working_path:
 454                     # If the new and old working_path share the same working_base, just need to update working_offset.
 455                     working_offset = working_path[len(working_base)+1:]
 456                 else:
 457                     # Else, assume that working_base has changed but working_offset is the same, e.g. a re-branch.
 458                     # TODO: Is this a safe assumption?!
 459                     working_base = working_path[:-len(working_offset)].rstrip('/')
 460                 working_rev = d['copyfrom_revision']
 461                 if debug:
 462                     print ">> find_svn_ancestors: copy-from: " + working_base + " " + working_offset + "@" + str(working_rev)
 463                 ancestors.append({'path': [working_base, working_offset], 'revision': working_rev})
 464                 # If we found a copy-from case which matches our source_base, we're done
 465                 if (path_old == source_base) or (path_old.startswith(source_base + "/")):
 466                     return ancestors
 467                 # Else, follow the copy and keep on searching
 468                 break
 469     return None
 470
 471 def replay_svn_ancestors(ancestors, source_repos_url, source_url, target_url):
 472     """
 473     Given an array of ancestor info (find_svn_ancestors), replay the history
 474     to correctly track renames ("svn copy/move") across branch-merges.
 475
 476     For example, consider a sequence of events like this:
 477     1. svn copy /trunk /branches/fix1
 478     2. (Make some changes on /branches/fix1)
 479     3. svn copy /branches/fix1/Proj1 /branches/fix1/Proj2  " Rename folder
 480     4. svn copy /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt  " Rename file inside renamed folder
 481     5. svn co /trunk && svn merge /branches/fix1
 482     After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
 483     and and add of /trunk/Proj2 comp-from /branches/fix1/Proj2. If we were just
 484     to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
 485     we'd lose the logical history that Proj2/file2.txt is really a descendant
 486     of Proj1/file1.txt.
 487
 488     'source_repos_url' is the full URL to the root of the source repository.
 489     'ancestors' is the array returned by find_svn_ancestors() with the final
 490       destination info appended to it by process_svn_log_entry().
 491     'dest_path'
 492     """
 493     # Ignore ancestors[0], which is the original (pre-branch-copy) trunk path
 494     # Ignore ancestors[1], which is the original branch-creation commit
 495     # Ignore ancestors[n], which is the final commit back to trunk
 496     for idx in range(1, len(ancestors)-1):
 497         ancestor = ancestors[idx]
 498         source_base = ancestor['path'][0]
 499         source_offset = ancestor['path'][1]
 500         source_path = source_base + "/" + source_offset
 501         source_rev = ancestor['revision']
 502         source_rev_next = ancestors[idx+1]['revision']
 503         # Do a "svn log" on the _parent_ directory of source_path, since trying to get log info
 504         # for the "old path" on the revision where the copy/move happened will fail.
 505         if "/" in source_path:
 506             p_source_path = source_path[:source_path.rindex('/')]
 507         else:
 508             p_source_path = ""
 509         if debug:
 510             print ">> replay_svn_ancestors: ["+str(idx)+"]" + source_path+"@"+str(source_rev) + "  ["+p_source_path+"@"+str(source_rev)+":"+str(source_rev_next-1)+"]"
 511         it_log_entries = iter_svn_log_entries(source_repos_url+p_source_path, source_rev, source_rev_next-1)
 512         for log_entry in it_log_entries:
 513             #print ">> replay_svn_ancestors: log_entry: (" + source_repos_url+source_base + ")"
 514             #print log_entry
 515             # TODO: Hit a problem case with a rename-situation where the "remove" was committed ahead of the "add (copy)".
 516             #       Do we maybe need to buffer all the remove's until the end of the entire replay session?
 517             #       Or can we maybe work around this by passing an explicit rev # into "svn copy"?
 518             process_svn_log_entry(log_entry, source_repos_url, source_repos_url+source_base, target_url)
 519
 520 def process_svn_log_entry(log_entry, source_repos_url, source_url, target_url):
 521     """
 522     Process SVN changes from the given log entry.
 523     Returns array of all the paths in the working-copy that were changed,
 524     i.e. the paths which need to be "svn commit".
 525
 526     'log_entry' is the array structure built by parse_svn_log_xml().
 527     'source_repos_url' is the full URL to the root of the source repository.
 528     'source_url' is the full URL to the source path in the source repository.
 529     'target_url' is the full URL to the target path in the target repository.
 530     """
 531     # Get the relative offset of source_url based on source_repos_url, e.g. u'/branches/bug123'
 532     source_base = source_url[len(source_repos_url):]
 533     if debug:
 534         print ">> process_svn_log_entry: " + source_url + " (" + source_base + ")"
 535
 536     svn_rev = log_entry['revision']
 537     # Get current target revision, for "svn copy" support
 538     dup_info = get_svn_info(target_url)
 539     dup_rev = dup_info['revision']
 540
 541     removed_paths = []
 542     unrelated_paths = []
 543     commit_paths = []
 544
 545     for d in log_entry['changed_paths']:
 546         # Get the full path for this changed_path
 547         # e.g. u'/branches/bug123/projectA/file1.txt'
 548         path = d['path']
 549         if not path.startswith(source_base + "/"):
 550             # Ignore changed files that are not part of this subdir
 551             if path != source_base:
 552                 print ">> process_svn_log_entry: Unrelated path: " + path + "  (" + source_base + ")"
 553                 unrelated_paths.append(path)
 554             continue
 555         # Calculate the offset (based on source_base) for this changed_path
 556         # e.g. u'projectA/file1.txt'
 557         # (path = source_base + "/" + path_offset)
 558         path_offset = path[len(source_base):].strip("/")
 559         # Get the action for this path
 560         action = d['action']
 561         if action not in 'MARD':
 562             display_error("In SVN rev. %d: action '%s' not supported. \
 563                            Please report a bug!" % (svn_rev, action))
 564
 565         # Try to be efficient and keep track of an explicit list of paths in the
 566         # working copy that changed. If we commit from the root of the working copy,
 567         # then SVN needs to crawl the entire working copy looking for pending changes.
 568         # But, if we gather too many paths to commit, then we wipe commit_paths below
 569         # and end-up doing a commit at the root of the working-copy.
 570         if len (commit_paths) < 100:
 571             commit_paths.append(path_offset)
 572
 573         # Special-handling for replace's
 574         is_replace = False
 575         if action == 'R':
 576             if svnlog_verbose:
 577                 msg = " " + action + " " + d['path']
 578                 if d['copyfrom_path']:
 579                     msg += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
 580                 print msg
 581             # If file was "replaced" (deleted then re-added, all in same revision),
 582             # then we need to run the "svn rm" first, then change action='A'. This
 583             # lets the normal code below handle re-"svn add"'ing the files. This
 584             # should replicate the "replace".
 585             run_svn(["up", path_offset])
 586             run_svn(["remove", "--force", path_offset])
 587             action = 'A'
 588             is_replace = True
 589
 590         # Handle all the various action-types
 591         # (Handle "add" first, for "svn copy/move" support)
 592         if action == 'A':
 593             if svnlog_verbose:
 594                 msg = " " + action + " " + d['path']
 595                 if d['copyfrom_path']:
 596                     msg += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
 597                 print msg
 598             # Determine where to export from
 599             copyfrom_rev = svn_rev
 600             copyfrom_path = path
 601             svn_copy = False
 602             # Handle cases where this "add" was a copy from another URL in the source repos
 603             if d['copyfrom_revision']:
 604                 copyfrom_rev = d['copyfrom_revision']
 605                 copyfrom_path = d['copyfrom_path']
 606                 if debug:
 607                     print ">> process_svn_log_entry: copy-to: " + source_base + " " + path_offset
 608                 if source_base in copyfrom_path:
 609                     # If the copy-from path is inside the current working-copy, no need to check ancestry.
 610                     ancestors = []
 611                     copyfrom_path = copyfrom_path[len(source_base):].strip("/")
 612                     if debug:
 613                         print ">> process_svn_log_entry: Found copy: " + copyfrom_path+"@"+str(copyfrom_rev)
 614                     svn_copy = True
 615                 else:
 616                     ancestors = find_svn_ancestors(source_repos_url, source_base, path_offset,
 617                                                    copyfrom_path, copyfrom_rev)
 618                 if ancestors:
 619                     # Reverse the list, so that we loop in chronological order
 620                     ancestors.reverse()
 621                     # Append the current revision
 622                     ancestors.append({'path': [source_base, path_offset], 'revision': svn_rev})
 623                     # ancestors[0] is the original (pre-branch-copy) trunk path.
 624                     # ancestors[1] is the first commit on the new branch.
 625                     copyfrom_rev =  ancestors[0]['revision']
 626                     copyfrom_base = ancestors[0]['path'][0]
 627                     copyfrom_offset = ancestors[0]['path'][1]
 628                     copyfrom_path = copyfrom_base + copyfrom_offset
 629                     if debug:
 630                         print ">> process_svn_log_entry: FOUND PARENT:"
 631                         for idx in range(0,len(ancestors)):
 632                             ancestor = ancestors[idx]
 633                             print "     ["+str(idx)+"] " + ancestor['path'][0]+" "+ancestor['path'][1]+"@"+str(ancestor['revision'])
 634                     #print ">> process_svn_log_entry: copyfrom_path (before): " + copyfrom_path + " source_base: " + source_base + " p: " + p
 635                     copyfrom_path = copyfrom_path[len(source_base):].strip("/")
 636                     #print ">> process_svn_log_entry: copyfrom_path (after): " + copyfrom_path
 637                     svn_copy = True
 638             # If this add was a copy-from, do a smart replay of the ancestors' history.
 639             if svn_copy:
 640                 if debug:
 641                     print ">> process_svn_log_entry: svn_copy: copy-from: " + copyfrom_path+"@"+str(copyfrom_rev) + "  source_base: "+source_base + "  len(ancestors): " + str(len(ancestors))
 642                 # If we don't have any ancestors, then this is just a straight "svn copy" in the current working-copy.
 643                 if not ancestors:
 644                     # ...but not if the target is already tracked, because this might run several times for the same path.
 645                     # TODO: Is there a better way to avoid recusion bugs? Maybe a collection of processed paths?
 646                     if not in_svn(path_offset):
 647                         if os.path.exists(copyfrom_path):
 648                             # If the copyfrom_path exists in the working-copy, do a local copy
 649                             run_svn(["copy", copyfrom_path, path_offset])
 650                         else:
 651                             run_svn(["copy", "-r", dup_rev, target_url+"/"+copyfrom_path+"@"+str(dup_rev), path_offset])
 652                 else:
 653                     if d['kind'] == 'dir':
 654                         # Replay any actions which happened to this folder from the ancestor path(s).
 655                         replay_svn_ancestors(ancestors, source_repos_url, source_url, target_url)
 656                     else:
 657                         # Just do a straight "svn copy" for files. There isn't any kind of "dependent"
 658                         # history we might need to replay like for folders.
 659                         # TODO: Is this logic really correct? Doing a WC vs URL "svn copy" based on existence
 660                         #       of *source* location seems a bit kludgy. Should there be a running list of
 661                         #       renames during replay_svn_ancestors >> process_svn_log_entry?
 662                         if os.path.exists(copyfrom_path):
 663                             # If the copyfrom_path exists in the working-copy, do a local copy
 664                             run_svn(["copy", copyfrom_path, path_offset])
 665                         else:
 666                             # Else, could be a situation where replay_svn_ancestors() is replaying branch
 667                             # history and a copy was committed across two revisions: first the deletion
 668                             # followed by the later add. In such a case, we need to copy from HEAD (dup_rev)
 669                             # of the path in *target_url*
 670                             run_svn(["copy", "-r", dup_rev, target_url+"/"+copyfrom_path+"@"+str(dup_rev), path_offset])
 671             # Else just copy/export the files from the source repo and "svn add" them.
 672             else:
 673                 # Create (parent) directory if needed
 674                 if d['kind'] == 'dir':
 675                     p_path = path_offset
 676                 else:
 677                     p_path = os.path.dirname(path_offset).strip() or '.'
 678                 if not os.path.exists(p_path):
 679                     os.makedirs(p_path)
 680                 # Export the entire added tree.
 681                 run_svn(["export", "--force", "-r", str(copyfrom_rev),
 682                          source_repos_url + copyfrom_path + "@" + str(copyfrom_rev), path_offset])
 683                 # TODO: The "no in_svn" condition here is wrong for replace cases.
 684                 #       Added the in_svn condition here originally since "svn export" is recursive
 685                 #       but "svn log" will have an entry for each indiv file, hence we run into a
 686                 #       cannot-re-add-file-which-is-already-added issue.
 687                 if (not in_svn(path_offset)) or (is_replace):
 688                     run_svn(["add", "--parents", path_offset])
 689                 # TODO: Need to copy SVN properties from source repos
 690
 691         elif action == 'D':
 692             # Queue "svn remove" commands, to allow the action == 'A' handling the opportunity
 693             # to do smart "svn copy" handling on copy/move/renames.
 694             removed_paths.append(path_offset)
 695
 696         elif action == 'M':
 697             if svnlog_verbose:
 698                 print " " + action + " " + d['path']
 699             out = run_svn(["merge", "-c", str(svn_rev), "--non-recursive",
 700                      "--non-interactive", "--accept=theirs-full",
 701                      source_url+"/"+path_offset+"@"+str(svn_rev), path_offset])
 702
 703         else:
 704             display_error("Internal Error: pull_svn_rev: Unhandled 'action' value: '" + action + "'")
 705
 706     if removed_paths:
 707         for path_offset in removed_paths:
 708             if svnlog_verbose:
 709                 print " D " + source_url+"/"+path_offset
 710             run_svn(["remove", "--force", path_offset])
 711
 712     if unrelated_paths:
 713         print "Unrelated paths: (vs. '" + source_base + "')"
 714         print "*", unrelated_paths
 715
 716     return commit_paths
 717
 718 def pull_svn_rev(log_entry, source_repos_url, source_url, target_url, keep_author=False):
 719     """
 720     Pull SVN changes from the given log entry.
 721     Returns the new SVN revision.
 722     If an exception occurs, it will rollback to revision 'svn_rev - 1'.
 723     """
 724     ## Get the relative offset of source_url based on source_repos_url, e.g. u'/branches/bug123'
 725     #source_base = source_url[len(source_repos_url):]
 726
 727     svn_rev = log_entry['revision']
 728     print "\n(Starting source rev #"+str(svn_rev)+":)"
 729     print "r"+str(log_entry['revision']) + " | " + \
 730           log_entry['author'] + " | " + \
 731           str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' '))
 732     print log_entry['message']
 733     print "------------------------------------------------------------------------"
 734     commit_paths = process_svn_log_entry(log_entry, source_repos_url, source_url, target_url)
 735
 736     # If we had too many individual paths to commit, wipe the list and just commit at
 737     # the root of the working copy.
 738     if len (commit_paths) > 99:
 739         commit_paths = []
 740
 741     # TODO: Use SVN properties to track source URL + rev in the target repo?
 742     #       This would provide a more reliable resume-support
 743     try:
 744         commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author)
 745     except ExternalCommandFailed:
 746         # try to ignore the Properties conflicts on files and dirs
 747         # use the copy from original_wc
 748         # TODO: Need to re-work this?
 749         #has_Conflict = False
 750         #for d in log_entry['changed_paths']:
 751         #    p = d['path']
 752         #    p = p[len(source_base):].strip("/")
 753         #    if os.path.isfile(p):
 754         #        if os.path.isfile(p + ".prej"):
 755         #            has_Conflict = True
 756         #            shutil.copy(original_wc + os.sep + p, p)
 757         #            p2=os.sep + p.replace('_', '__').replace('/', '_') \
 758         #                      + ".prej-" + str(svn_rev)
 759         #            shutil.move(p + ".prej", os.path.dirname(original_wc) + p2)
 760         #            w="\n### Properties conflicts ignored:"
 761         #            print "%s %s, in revision: %s\n" % (w, p, svn_rev)
 762         #    elif os.path.isdir(p):
 763         #        if os.path.isfile(p + os.sep + "dir_conflicts.prej"):
 764         #            has_Conflict = True
 765         #            p2=os.sep + p.replace('_', '__').replace('/', '_') \
 766         #                      + "_dir__conflicts.prej-" + str(svn_rev)
 767         #            shutil.move(p + os.sep + "dir_conflicts.prej",
 768         #                        os.path.dirname(original_wc) + p2)
 769         #            w="\n### Properties conflicts ignored:"
 770         #            print "%s %s, in revision: %s\n" % (w, p, svn_rev)
 771         #            out = run_svn(["propget", "svn:ignore",
 772         #                           original_wc + os.sep + p])
 773         #            if out:
 774         #                run_svn(["propset", "svn:ignore", out.strip(), p])
 775         #            out = run_svn(["propget", "svn:externel",
 776         #                           original_wc + os.sep + p])
 777         #            if out:
 778         #                run_svn(["propset", "svn:external", out.strip(), p])
 779         ## try again
 780         #if has_Conflict:
 781         #    commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author)
 782         #else:
 783             raise ExternalCommandFailed
 784     print "(Finished source rev #"+str(svn_rev)+")"
 785
 786
 787 def main():
 788     usage = "Usage: %prog [-a] [-c] [-r SVN rev] <Source SVN URL> <Target SVN URL>"
 789     parser = OptionParser(usage)
 790     parser.add_option("-a", "--keep-author", action="store_true",
 791                       dest="keep_author", help="Keep revision Author or not")
 792     parser.add_option("-c", "--continue-from-break", action="store_true",
 793                       dest="cont_from_break",
 794                       help="Continue from previous break")
 795     parser.add_option("-r", "--svn-rev", type="int", dest="svn_rev",
 796                       help="SVN revision to checkout from")
 797     (options, args) = parser.parse_args()
 798     if len(args) != 2:
 799         display_error("incorrect number of arguments\n\nTry: svn2svn.py --help",
 800                       False)
 801
 802     source_url = args.pop(0).rstrip("/")
 803     target_url = args.pop(0).rstrip("/")
 804     if options.keep_author:
 805         keep_author = True
 806     else:
 807         keep_author = False
 808
 809     # Find the greatest_rev in the source repo
 810     svn_info = get_svn_info(source_url)
 811     greatest_rev = svn_info['revision']
 812
 813     dup_wc = "_dup_wc"
 814
 815     # if old working copy does not exist, disable continue mode
 816     # TODO: Better continue support. Maybe include source repo's rev # in target commit info?
 817     if not os.path.exists(dup_wc):
 818         options.cont_from_break = False
 819
 820     if not options.cont_from_break:
 821         # Warn if Target SVN URL existed
 822         cmd = find_program("svn")
 823         pipe = Popen([cmd] + ["list"] + [target_url], executable=cmd,
 824                      stdout=PIPE, stderr=PIPE)
 825         out, err = pipe.communicate()
 826         if pipe.returncode == 0:
 827             print "Target SVN URL: %s existed!" % target_url
 828             if out:
 829                 print out
 830             print "Press 'Enter' to Continue, 'Ctrl + C' to Cancel..."
 831             print "(Timeout in 5 seconds)"
 832             rfds, wfds, efds = select.select([sys.stdin], [], [], 5)
 833
 834         # Get log entry for the SVN revision we will check out
 835         if options.svn_rev:
 836             # If specify a rev, get log entry just before or at rev
 837             svn_start_log = get_last_svn_log_entry(source_url, 1, options.svn_rev, False)
 838         else:
 839             # Otherwise, get log entry of branch creation
 840             # TODO: This call is *very* expensive on a repo with lots of revisions.
 841             #       Even though the call is passing --limit 1, it seems like that limit-filter
 842             #       is happening after SVN has fetched the full log history.
 843             svn_start_log = get_first_svn_log_entry(source_url, 1, greatest_rev, False)
 844
 845         # This is the revision we will start from for source_url
 846         svn_rev = svn_start_log['revision']
 847
 848         # Check out a working copy of target_url
 849         dup_wc = os.path.abspath(dup_wc)
 850         if os.path.exists(dup_wc):
 851             shutil.rmtree(dup_wc)
 852         svn_checkout(target_url, dup_wc)
 853         os.chdir(dup_wc)
 854
 855         # For the initial commit to the target URL, export all the contents from
 856         # the source URL at the start-revision.
 857         paths = run_svn(["list", "-r", str(svn_rev), source_url+"@"+str(svn_rev)])
 858         paths = paths.strip("\n").split("\n")
 859         for path in paths:
 860             if not path:
 861                 # Skip null lines
 862                 break
 863             # Directories have a trailing slash in the "svn list" output
 864             if path[-1] == "/":
 865                 path=path.rstrip('/')
 866                 if not os.path.exists(path):
 867                     os.makedirs(path)
 868             run_svn(["export", "--force", "-r" , str(svn_rev), source_url+"/"+path+"@"+str(svn_rev), path])
 869             run_svn(["add", path])
 870         commit_from_svn_log_entry(svn_start_log, [], keep_author)
 871     else:
 872         dup_wc = os.path.abspath(dup_wc)
 873         os.chdir(dup_wc)
 874         # TODO: Need better resume support. For the time being, expect caller explictly passes in resume revision.
 875         svn_rev = options.svn_rev
 876         if svn_rev < 1:
 877             display_error("Invalid arguments\n\nNeed to pass result rev # (-r) when using continue-mode (-c)", False)
 878
 879
 880     # Get SVN info
 881     svn_info = get_svn_info(source_url)
 882     # Get the base URL for the source repos, e.g. u'svn://svn.example.com/svn/repo'
 883     source_repos_url = svn_info['repos_url']
 884
 885     # Load SVN log starting from svn_rev + 1
 886     it_log_entries = iter_svn_log_entries(source_url, svn_rev + 1, greatest_rev)
 887
 888     try:
 889         for log_entry in it_log_entries:
 890             # Replay this revision from source_url into target_url
 891             pull_svn_rev(log_entry, source_repos_url, source_url, target_url, keep_author)
 892             # Update our target working-copy, to ensure everything says it's at the new HEAD revision
 893             run_svn(["up", dup_wc])
 894
 895     except KeyboardInterrupt:
 896         print "\nStopped by user."
 897         run_svn(["cleanup"])
 898         run_svn(["revert", "--recursive", "."])
 899     except:
 900         print "\nCommand failed with following error:\n"
 901         traceback.print_exc()
 902         run_svn(["cleanup"])
 903         run_svn(["revert", "--recursive", "."])
 904     finally:
 905         run_svn(["up"])
 906         print "\nFinished!"
 907
 908
 909 if __name__ == "__main__":
 910     main()
 911
 912 # vim:sts=4:sw=4: