svn2svn.py

   1 #!/usr/bin/env python
   2 """
   3 svn2svn.py
   4
   5 Replicate (replay) changesets from one SVN repository to another:
   6 * Maintains full logical history (e.g. uses "svn copy" for renames).
   7 * Maintains original commit messages.
   8 * Cannot maintain original commit date, but appends original commit date
   9   for each commit message: "Date: %d".
  10 * Optionally maintain source author info. (Only supported if accessing
  11   target SVN repo via file://)
  12 * Optionally run an external shell script before each replayed commit
  13   to give the ability to dynamically exclude or modify files as part
  14   of the replay.
  15
  16 License: GPLv2, the same as hgsvn.
  17 Author: Tony Duckles (https://github.com/tonyduckles/svn2svn)
  18 (This is a forked and modified verison of http://code.google.com/p/svn2svn/)
  19 """
  20
  21 import os
  22 import sys
  23 import time
  24 import locale
  25 import shutil
  26 import select
  27 import calendar
  28 import traceback
  29
  30 from optparse import OptionParser
  31 from subprocess import Popen, PIPE
  32 from datetime import datetime
  33 from operator import itemgetter
  34
  35 try:
  36     from xml.etree import cElementTree as ET
  37 except ImportError:
  38     try:
  39         from xml.etree import ElementTree as ET
  40     except ImportError:
  41         try:
  42             import cElementTree as ET
  43         except ImportError:
  44             from elementtree import ElementTree as ET
  45
  46 svn_log_args = ['log', '--xml']
  47 svn_info_args = ['info', '--xml']
  48 svn_checkout_args = ['checkout', '-q']
  49 svn_status_args = ['status', '--xml', '-v', '--ignore-externals']
  50
  51 # Setup debug options
  52 debug = False
  53 debug_runsvn_timing = False    # Display how long each "svn" OS command took to run?
  54 # Setup verbosity options
  55 runsvn_showcmd = False    # Display every "svn" OS command we run?
  56 runsvn_showout = False    # Display the stdout results from every  "svn" OS command we run?
  57 svnlog_verbose = True     # Display each action + changed-path as we walk the history?
  58
  59 # define exception class
  60 class ExternalCommandFailed(RuntimeError):
  61     """
  62     An external command failed.
  63     """
  64
  65 def display_error(message, raise_exception = True):
  66     """
  67     Display error message, then terminate.
  68     """
  69     print "Error:", message
  70     print
  71     if raise_exception:
  72         raise ExternalCommandFailed
  73     else:
  74         sys.exit(1)
  75
  76 # Windows compatibility code by Bill Baxter
  77 if os.name == "nt":
  78     def find_program(name):
  79         """
  80         Find the name of the program for Popen.
  81         Windows is finnicky about having the complete file name. Popen
  82         won't search the %PATH% for you automatically.
  83         (Adapted from ctypes.find_library)
  84         """
  85         # See MSDN for the REAL search order.
  86         base, ext = os.path.splitext(name)
  87         if ext:
  88             exts = [ext]
  89         else:
  90             exts = ['.bat', '.exe']
  91         for directory in os.environ['PATH'].split(os.pathsep):
  92             for e in exts:
  93                 fname = os.path.join(directory, base + e)
  94                 if os.path.exists(fname):
  95                     return fname
  96         return None
  97 else:
  98     def find_program(name):
  99         """
 100         Find the name of the program for Popen.
 101         On Unix, popen isn't picky about having absolute paths.
 102         """
 103         return name
 104
 105 def shell_quote(s):
 106     if os.name == "nt":
 107         q = '"'
 108     else:
 109         q = "'"
 110     return q + s.replace('\\', '\\\\').replace("'", "'\"'\"'") + q
 111
 112 locale_encoding = locale.getpreferredencoding()
 113
 114 def run_svn(args, fail_if_stderr=False, encoding="utf-8"):
 115     """
 116     Run svn cmd in PIPE
 117     exit if svn cmd failed
 118     """
 119     def _transform_arg(a):
 120         if isinstance(a, unicode):
 121             a = a.encode(encoding or locale_encoding)
 122         elif not isinstance(a, str):
 123             a = str(a)
 124         return a
 125     t_args = map(_transform_arg, args)
 126
 127     cmd = find_program("svn")
 128     cmd_string = str(" ".join(map(shell_quote, [cmd] + t_args)))
 129     if runsvn_showcmd:
 130         print "$", "("+os.getcwd()+")", cmd_string
 131     if debug_runsvn_timing:
 132         time1 = time.time()
 133     pipe = Popen([cmd] + t_args, executable=cmd, stdout=PIPE, stderr=PIPE)
 134     out, err = pipe.communicate()
 135     if debug_runsvn_timing:
 136         time2 = time.time()
 137         print "(" + str(round(time2-time1,4)) + " elapsed)"
 138     if out and runsvn_showout:
 139         print out
 140     if pipe.returncode != 0 or (fail_if_stderr and err.strip()):
 141         display_error("External program failed (return code %d): %s\n%s"
 142             % (pipe.returncode, cmd_string, err))
 143     return out
 144
 145 def svn_date_to_timestamp(svn_date):
 146     """
 147     Parse an SVN date as read from the XML output and
 148     return the corresponding timestamp.
 149     """
 150     # Strip microseconds and timezone (always UTC, hopefully)
 151     # XXX there are various ISO datetime parsing routines out there,
 152     # cf. http://seehuhn.de/comp/pdate
 153     date = svn_date.split('.', 2)[0]
 154     time_tuple = time.strptime(date, "%Y-%m-%dT%H:%M:%S")
 155     return calendar.timegm(time_tuple)
 156
 157 def parse_svn_info_xml(xml_string):
 158     """
 159     Parse the XML output from an "svn info" command and extract
 160     useful information as a dict.
 161     """
 162     d = {}
 163     tree = ET.fromstring(xml_string)
 164     entry = tree.find('.//entry')
 165     if entry:
 166         d['url'] = entry.find('url').text
 167         d['revision'] = int(entry.get('revision'))
 168         d['repos_url'] = tree.find('.//repository/root').text
 169         d['repos_uuid'] = tree.find('.//repository/uuid').text
 170         d['last_changed_rev'] = int(tree.find('.//commit').get('revision'))
 171         d['kind'] = entry.get('kind')
 172     return d
 173
 174 def parse_svn_log_xml(xml_string):
 175     """
 176     Parse the XML output from an "svn log" command and extract
 177     useful information as a list of dicts (one per log changeset).
 178     """
 179     l = []
 180     tree = ET.fromstring(xml_string)
 181     for entry in tree.findall('logentry'):
 182         d = {}
 183         d['revision'] = int(entry.get('revision'))
 184         # Some revisions don't have authors, most notably
 185         # the first revision in a repository.
 186         author = entry.find('author')
 187         d['author'] = author is not None and author.text or None
 188         d['date'] = svn_date_to_timestamp(entry.find('date').text)
 189         # Some revisions may have empty commit message
 190         message = entry.find('msg')
 191         message = message is not None and message.text is not None \
 192                         and message.text.strip() or ""
 193         # Replace DOS return '\r\n' and MacOS return '\r' with unix return '\n'
 194         d['message'] = message.replace('\r\n', '\n').replace('\n\r', '\n'). \
 195                                replace('\r', '\n')
 196         paths = []
 197         for path in entry.findall('.//path'):
 198             copyfrom_rev = path.get('copyfrom-rev')
 199             if copyfrom_rev:
 200                 copyfrom_rev = int(copyfrom_rev)
 201             paths.append({
 202                 'path': path.text,
 203                 'kind': path.get('kind'),
 204                 'action': path.get('action'),
 205                 'copyfrom_path': path.get('copyfrom-path'),
 206                 'copyfrom_revision': copyfrom_rev,
 207             })
 208         # Need to sort paths (i.e. into hierarchical order), so that process_svn_log_entry()
 209         # can process actions in depth-first order.
 210         d['changed_paths'] = sorted(paths, key=itemgetter('path'))
 211         l.append(d)
 212     return l
 213
 214 def parse_svn_status_xml(xml_string, base_dir=None):
 215     """
 216     Parse the XML output from an "svn status" command and extract
 217     useful info as a list of dicts (one per status entry).
 218     """
 219     l = []
 220     tree = ET.fromstring(xml_string)
 221     for entry in tree.findall('.//entry'):
 222         d = {}
 223         path = entry.get('path')
 224         if base_dir is not None:
 225             assert path.startswith(base_dir)
 226             path = path[len(base_dir):].lstrip('/\\')
 227         d['path'] = path
 228         wc_status = entry.find('wc-status')
 229         if wc_status.get('item') == 'external':
 230             d['type'] = 'external'
 231         # TODO: Optionally check wc_status.get('item') == 'deleted' and return type='unversioned'?
 232         elif wc_status.get('revision') is not None:
 233             d['type'] = 'normal'
 234         else:
 235             d['type'] = 'unversioned'
 236         l.append(d)
 237     return l
 238
 239 def get_svn_info(svn_url_or_wc, rev_number=None):
 240     """
 241     Get SVN information for the given URL or working copy,
 242     with an optionally specified revision number.
 243     Returns a dict as created by parse_svn_info_xml().
 244     """
 245     if rev_number is not None:
 246         args = [svn_url_or_wc + "@" + str(rev_number)]
 247     else:
 248         args = [svn_url_or_wc]
 249     xml_string = run_svn(svn_info_args + args, fail_if_stderr=True)
 250     return parse_svn_info_xml(xml_string)
 251
 252 def svn_checkout(svn_url, checkout_dir, rev_number=None):
 253     """
 254     Checkout the given URL at an optional revision number.
 255     """
 256     args = []
 257     if rev_number is not None:
 258         args += ['-r', rev_number]
 259     args += [svn_url, checkout_dir]
 260     return run_svn(svn_checkout_args + args)
 261
 262 def run_svn_log(svn_url_or_wc, rev_start, rev_end, limit, stop_on_copy=False, get_changed_paths=True):
 263     """
 264     Fetch up to 'limit' SVN log entries between the given revisions.
 265     """
 266     if stop_on_copy:
 267         args = ['--stop-on-copy']
 268     else:
 269         args = []
 270     url = str(svn_url_or_wc)
 271     if rev_start != 'HEAD' and rev_end != 'HEAD':
 272         args += ['-r', '%s:%s' % (rev_start, rev_end)]
 273         if not "@" in svn_url_or_wc:
 274             url += "@" + str(max(rev_start, rev_end))
 275     if get_changed_paths:
 276         args += ['-v']
 277     args += ['--limit', str(limit), url]
 278     xml_string = run_svn(svn_log_args + args)
 279     return parse_svn_log_xml(xml_string)
 280
 281 def get_svn_status(svn_wc, flags=None):
 282     """
 283     Get SVN status information about the given working copy.
 284     """
 285     # Ensure proper stripping by canonicalizing the path
 286     svn_wc = os.path.abspath(svn_wc)
 287     args = []
 288     if flags:
 289         args += [flags]
 290     args += [svn_wc]
 291     xml_string = run_svn(svn_status_args + args)
 292     return parse_svn_status_xml(xml_string, svn_wc)
 293
 294 def get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=False, get_changed_paths=True):
 295     """
 296     Get the first SVN log entry in the requested revision range.
 297     """
 298     entries = run_svn_log(svn_url, rev_start, rev_end, 1, stop_on_copy, get_changed_paths)
 299     if not entries:
 300         display_error("No SVN log for %s between revisions %s and %s" %
 301                       (svn_url, rev_start, rev_end))
 302
 303     return entries[0]
 304
 305 def get_first_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
 306     """
 307     Get the first log entry after/at the given revision number in an SVN branch.
 308     By default the revision number is set to 0, which will give you the log
 309     entry corresponding to the branch creaction.
 310
 311     NOTE: to know whether the branch creation corresponds to an SVN import or
 312     a copy from another branch, inspect elements of the 'changed_paths' entry
 313     in the returned dictionary.
 314     """
 315     return get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True, get_changed_paths=True)
 316
 317 def get_last_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
 318     """
 319     Get the last log entry before/at the given revision number in an SVN branch.
 320     By default the revision number is set to HEAD, which will give you the log
 321     entry corresponding to the latest commit in branch.
 322     """
 323     return get_one_svn_log_entry(svn_url, rev_end, rev_start, stop_on_copy=True, get_changed_paths=True)
 324
 325
 326 log_duration_threshold = 10.0
 327 log_min_chunk_length = 10
 328
 329 def iter_svn_log_entries(svn_url, first_rev, last_rev):
 330     """
 331     Iterate over SVN log entries between first_rev and last_rev.
 332
 333     This function features chunked log fetching so that it isn't too nasty
 334     to the SVN server if many entries are requested.
 335     """
 336     cur_rev = first_rev
 337     chunk_length = log_min_chunk_length
 338     chunk_interval_factor = 1.0
 339     while last_rev == "HEAD" or cur_rev <= last_rev:
 340         start_t = time.time()
 341         stop_rev = min(last_rev, cur_rev + int(chunk_length * chunk_interval_factor))
 342         entries = run_svn_log(svn_url, cur_rev, stop_rev, chunk_length)
 343         duration = time.time() - start_t
 344         if not entries:
 345             if stop_rev == last_rev:
 346                 break
 347             cur_rev = stop_rev + 1
 348             chunk_interval_factor *= 2.0
 349             continue
 350         for e in entries:
 351             yield e
 352         cur_rev = e['revision'] + 1
 353         # Adapt chunk length based on measured request duration
 354         if duration < log_duration_threshold:
 355             chunk_length = int(chunk_length * 2.0)
 356         elif duration > log_duration_threshold * 2:
 357             chunk_length = max(log_min_chunk_length, int(chunk_length / 2.0))
 358
 359 def commit_from_svn_log_entry(entry, files=None, keep_author=False):
 360     """
 361     Given an SVN log entry and an optional sequence of files, do an svn commit.
 362     """
 363     # TODO: Run optional external shell hook here, for doing pre-commit filtering
 364     # This will use the local timezone for displaying commit times
 365     timestamp = int(entry['date'])
 366     svn_date = str(datetime.fromtimestamp(timestamp))
 367     # Uncomment this one one if you prefer UTC commit times
 368     #svn_date = "%d 0" % timestamp
 369     if keep_author:
 370         options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date, "--username", entry['author']]
 371     else:
 372         options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date + "\nAuthor: " + entry['author']]
 373     if files:
 374         options += list(files)
 375     print "(Committing source rev #"+str(entry['revision'])+"...)"
 376     run_svn(options)
 377
 378 def in_svn(p):
 379     """
 380     Check if a given file/folder is being tracked by Subversion.
 381     Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
 382     With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
 383     Use "svn status" to check the status of the file/folder.
 384     """
 385     # TODO: Is there a better way to do this?
 386     entries = get_svn_status(p)
 387     if not entries:
 388       return False
 389     d = entries[0]
 390     return (d['type'] == 'normal')
 391
 392 def find_svn_ancestors(source_repos_url, source_base, source_offset, copyfrom_path, copyfrom_rev):
 393     """
 394     Given a copy-from path (copyfrom_path), walk the SVN history backwards to inspect
 395     the ancestory of that path. Build a collection of copyfrom_path+revision pairs
 396     for each of the branch-copies since the initial branch-creation.  If we find a
 397     copyfrom_path which source_base is a substring match of (e.g. we crawled back to
 398     the initial branch-copy from trunk), then return the collection of ancestor paths.
 399     Otherwise, copyfrom_path has no ancestory compared to source_base.
 400
 401     This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
 402     file/folder was renamed in a branch and then that branch was merged back to trunk.
 403
 404     PARAMETERS:
 405     * source_repos_url = Full URL to root of repository, e.g. 'file:///path/to/repos'
 406     * source_base = e.g. '/trunk'
 407     * source_offset = e.g. 'projectA/file1.txt'
 408     * copyfrom_path = e.g. '/branches/bug123/projectA/file1.txt'
 409     """
 410
 411     done = False
 412     working_path = copyfrom_path
 413     working_base = copyfrom_path[:-len(source_offset)].rstrip('/')
 414     working_offset = source_offset.strip('/')
 415     working_rev = copyfrom_rev
 416     ancestors = [{'path': [working_base, working_offset], 'revision': working_rev}]
 417     while not done:
 418         # Get the first "svn log" entry for this path (relative to @rev)
 419         #working_path = working_base + "/" + working_offset
 420         if debug:
 421             print ">> find_svn_ancestors: " + source_repos_url + working_path + "@" + str(working_rev) + \
 422                    "  (" + working_base + " " + working_offset + ")"
 423         log_entry = get_first_svn_log_entry(source_repos_url + working_path + "@" + str(working_rev), 1, str(working_rev), True)
 424         if not log_entry:
 425             done = True
 426         # Find the action for our working_path in this revision
 427         for d in log_entry['changed_paths']:
 428             path = d['path']
 429             if not path in working_path:
 430                 continue
 431             # Check action-type for this file
 432             action = d['action']
 433             if action not in 'MARD':
 434                 display_error("In SVN rev. %d: action '%s' not supported. \
 435                                Please report a bug!" % (log_entry['revision'], action))
 436             if debug:
 437                 debug_desc = ": " + action + " " + path
 438                 if d['copyfrom_path']:
 439                     debug_desc += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
 440                 print debug_desc
 441
 442             if action == 'R':
 443                 # If file/folder was replaced, it has no ancestor
 444                 return []
 445             if action == 'D':
 446                 # If file/folder was deleted, it has no ancestor
 447                 return []
 448             if action == 'A':
 449                 # If file/folder was added but not a copy, it has no ancestor
 450                 if not d['copyfrom_path']:
 451                     return []
 452                 # Else, file/folder was added and is a copy, so check ancestors
 453                 path_old = d['copyfrom_path']
 454                 working_path = working_path.replace(path, path_old)
 455                 if working_base in working_path:
 456                     # If the new and old working_path share the same working_base, just need to update working_offset.
 457                     working_offset = working_path[len(working_base)+1:]
 458                 else:
 459                     # Else, assume that working_base has changed but working_offset is the same, e.g. a re-branch.
 460                     # TODO: Is this a safe assumption?!
 461                     working_base = working_path[:-len(working_offset)].rstrip('/')
 462                 working_rev = d['copyfrom_revision']
 463                 if debug:
 464                     print ">> find_svn_ancestors: copy-from: " + working_base + " " + working_offset + "@" + str(working_rev)
 465                 ancestors.append({'path': [working_base, working_offset], 'revision': working_rev})
 466                 # If we found a copy-from case which matches our source_base, we're done
 467                 if (path_old == source_base) or (path_old.startswith(source_base + "/")):
 468                     return ancestors
 469                 # Else, follow the copy and keep on searching
 470                 break
 471     return None
 472
 473 def replay_svn_ancestors(ancestors, source_repos_url, source_url, target_url):
 474     """
 475     Given an array of ancestor info (find_svn_ancestors), replay the history
 476     to correctly track renames ("svn copy/move") across branch-merges.
 477
 478     For example, consider a sequence of events like this:
 479     1. svn copy /trunk /branches/fix1
 480     2. (Make some changes on /branches/fix1)
 481     3. svn copy /branches/fix1/Proj1 /branches/fix1/Proj2  " Rename folder
 482     4. svn copy /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt  " Rename file inside renamed folder
 483     5. svn co /trunk && svn merge /branches/fix1
 484     After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
 485     and and add of /trunk/Proj2 comp-from /branches/fix1/Proj2. If we were just
 486     to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
 487     we'd lose the logical history that Proj2/file2.txt is really a descendant
 488     of Proj1/file1.txt.
 489
 490     'source_repos_url' is the full URL to the root of the source repository.
 491     'ancestors' is the array returned by find_svn_ancestors() with the final
 492       destination info appended to it by process_svn_log_entry().
 493     'dest_path'
 494     """
 495     # Ignore ancestors[0], which is the original (pre-branch-copy) trunk path
 496     # Ignore ancestors[1], which is the original branch-creation commit
 497     # Ignore ancestors[n], which is the final commit back to trunk
 498     for idx in range(1, len(ancestors)-1):
 499         ancestor = ancestors[idx]
 500         source_base = ancestor['path'][0]
 501         source_offset = ancestor['path'][1]
 502         source_path = source_base + "/" + source_offset
 503         source_rev = ancestor['revision']
 504         source_rev_next = ancestors[idx+1]['revision']
 505         # Do a "svn log" on the _parent_ directory of source_path, since trying to get log info
 506         # for the "old path" on the revision where the copy/move happened will fail.
 507         if "/" in source_path:
 508             p_source_path = source_path[:source_path.rindex('/')]
 509         else:
 510             p_source_path = ""
 511         if debug:
 512             print ">> replay_svn_ancestors: ["+str(idx)+"]" + source_path+"@"+str(source_rev) + "  ["+p_source_path+"@"+str(source_rev)+":"+str(source_rev_next-1)+"]"
 513         it_log_entries = iter_svn_log_entries(source_repos_url+p_source_path, source_rev, source_rev_next-1)
 514         for log_entry in it_log_entries:
 515             #print ">> replay_svn_ancestors: log_entry: (" + source_repos_url+source_base + ")"
 516             #print log_entry
 517             # TODO: Hit a problem case with a rename-situation where the "remove" was committed ahead of the "add (copy)".
 518             #       Do we maybe need to buffer all the remove's until the end of the entire replay session?
 519             #       Or can we maybe work around this by passing an explicit rev # into "svn copy"?
 520             process_svn_log_entry(log_entry, source_repos_url, source_repos_url+source_base, target_url)
 521
 522 def process_svn_log_entry(log_entry, source_repos_url, source_url, target_url):
 523     """
 524     Process SVN changes from the given log entry.
 525     Returns array of all the paths in the working-copy that were changed,
 526     i.e. the paths which need to be "svn commit".
 527
 528     'log_entry' is the array structure built by parse_svn_log_xml().
 529     'source_repos_url' is the full URL to the root of the source repository.
 530     'source_url' is the full URL to the source path in the source repository.
 531     'target_url' is the full URL to the target path in the target repository.
 532     """
 533     # Get the relative offset of source_url based on source_repos_url, e.g. u'/branches/bug123'
 534     source_base = source_url[len(source_repos_url):]
 535     if debug:
 536         print ">> process_svn_log_entry: " + source_url + " (" + source_base + ")"
 537
 538     svn_rev = log_entry['revision']
 539     # Get current target revision, for "svn copy" support
 540     dup_info = get_svn_info(target_url)
 541     dup_rev = dup_info['revision']
 542
 543     removed_paths = []
 544     unrelated_paths = []
 545     commit_paths = []
 546
 547     for d in log_entry['changed_paths']:
 548         # Get the full path for this changed_path
 549         # e.g. u'/branches/bug123/projectA/file1.txt'
 550         path = d['path']
 551         if not path.startswith(source_base + "/"):
 552             # Ignore changed files that are not part of this subdir
 553             if path != source_base:
 554                 print ">> process_svn_log_entry: Unrelated path: " + path + "  (" + source_base + ")"
 555                 unrelated_paths.append(path)
 556             continue
 557         # Calculate the offset (based on source_base) for this changed_path
 558         # e.g. u'projectA/file1.txt'
 559         # (path = source_base + "/" + path_offset)
 560         path_offset = path[len(source_base):].strip("/")
 561         # Get the action for this path
 562         action = d['action']
 563         if action not in 'MARD':
 564             display_error("In SVN rev. %d: action '%s' not supported. \
 565                            Please report a bug!" % (svn_rev, action))
 566
 567         # Try to be efficient and keep track of an explicit list of paths in the
 568         # working copy that changed. If we commit from the root of the working copy,
 569         # then SVN needs to crawl the entire working copy looking for pending changes.
 570         # But, if we gather too many paths to commit, then we wipe commit_paths below
 571         # and end-up doing a commit at the root of the working-copy.
 572         if len (commit_paths) < 100:
 573             commit_paths.append(path_offset)
 574
 575         # Special-handling for replace's
 576         is_replace = False
 577         if action == 'R':
 578             if svnlog_verbose:
 579                 msg = " " + action + " " + d['path']
 580                 if d['copyfrom_path']:
 581                     msg += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
 582                 print msg
 583             # If file was "replaced" (deleted then re-added, all in same revision),
 584             # then we need to run the "svn rm" first, then change action='A'. This
 585             # lets the normal code below handle re-"svn add"'ing the files. This
 586             # should replicate the "replace".
 587             run_svn(["up", path_offset])
 588             run_svn(["remove", "--force", path_offset])
 589             action = 'A'
 590             is_replace = True
 591
 592         # Handle all the various action-types
 593         # (Handle "add" first, for "svn copy/move" support)
 594         if action == 'A':
 595             if svnlog_verbose:
 596                 msg = " " + action + " " + d['path']
 597                 if d['copyfrom_path']:
 598                     msg += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
 599                 print msg
 600             # Determine where to export from
 601             copyfrom_rev = svn_rev
 602             copyfrom_path = path
 603             svn_copy = False
 604             # Handle cases where this "add" was a copy from another URL in the source repos
 605             if d['copyfrom_revision']:
 606                 copyfrom_rev = d['copyfrom_revision']
 607                 copyfrom_path = d['copyfrom_path']
 608                 if debug:
 609                     print ">> process_svn_log_entry: copy-to: " + source_base + " " + path_offset
 610                 if source_base in copyfrom_path:
 611                     # If the copy-from path is inside the current working-copy, no need to check ancestry.
 612                     ancestors = []
 613                     copyfrom_path = copyfrom_path[len(source_base):].strip("/")
 614                     if debug:
 615                         print ">> process_svn_log_entry: Found copy: " + copyfrom_path+"@"+str(copyfrom_rev)
 616                     svn_copy = True
 617                 else:
 618                     ancestors = find_svn_ancestors(source_repos_url, source_base, path_offset,
 619                                                    copyfrom_path, copyfrom_rev)
 620                 if ancestors:
 621                     # Reverse the list, so that we loop in chronological order
 622                     ancestors.reverse()
 623                     # Append the current revision
 624                     ancestors.append({'path': [source_base, path_offset], 'revision': svn_rev})
 625                     # ancestors[0] is the original (pre-branch-copy) trunk path.
 626                     # ancestors[1] is the first commit on the new branch.
 627                     copyfrom_rev =  ancestors[0]['revision']
 628                     copyfrom_base = ancestors[0]['path'][0]
 629                     copyfrom_offset = ancestors[0]['path'][1]
 630                     copyfrom_path = copyfrom_base + copyfrom_offset
 631                     if debug:
 632                         print ">> process_svn_log_entry: FOUND PARENT:"
 633                         for idx in range(0,len(ancestors)):
 634                             ancestor = ancestors[idx]
 635                             print "     ["+str(idx)+"] " + ancestor['path'][0]+" "+ancestor['path'][1]+"@"+str(ancestor['revision'])
 636                     #print ">> process_svn_log_entry: copyfrom_path (before): " + copyfrom_path + " source_base: " + source_base + " p: " + p
 637                     copyfrom_path = copyfrom_path[len(source_base):].strip("/")
 638                     #print ">> process_svn_log_entry: copyfrom_path (after): " + copyfrom_path
 639                     svn_copy = True
 640             # If this add was a copy-from, do a smart replay of the ancestors' history.
 641             if svn_copy:
 642                 if debug:
 643                     print ">> process_svn_log_entry: svn_copy: copy-from: " + copyfrom_path+"@"+str(copyfrom_rev) + "  source_base: "+source_base + "  len(ancestors): " + str(len(ancestors))
 644                 # If we don't have any ancestors, then this is just a straight "svn copy" in the current working-copy.
 645                 if not ancestors:
 646                     # ...but not if the target is already tracked, because this might run several times for the same path.
 647                     # TODO: Is there a better way to avoid recusion bugs? Maybe a collection of processed paths?
 648                     # TODO: The "not in_svn" check creates problems for action="R" cases, e.g. r18834
 649                     if (not in_svn(path_offset)) or is_replace:
 650                         if os.path.exists(copyfrom_path):
 651                             # If the copyfrom_path exists in the working-copy, do a local copy
 652                             run_svn(["copy", copyfrom_path, path_offset])
 653                         else:
 654                             # TODO: This doesn't respect copyfrom_rev at all. Found a case where file was (accidentally?)
 655                             #       deleted in one commit and restored (added copy-from) in a latter commit. Do we maybe
 656                             #       need a mapping table of target_url -> source_url rev #'s, so that given a source_url
 657                             #       copyfrom_rev, we can map that to the equiv target_url rev#, so we do the "svn copy"
 658                             #       here correctly?
 659                             tmp_rev = dup_rev  # Kludge for time-being
 660                             if copyfrom_path == 'Data/Databases/DBUpdate.mdb' and copyfrom_rev == 17568:
 661                                 tmp_rev = dup_rev-10
 662                             run_svn(["copy", "-r", tmp_rev, target_url+"/"+copyfrom_path+"@"+str(tmp_rev), path_offset])
 663                 else:
 664                     if d['kind'] == 'dir':
 665                         # Replay any actions which happened to this folder from the ancestor path(s).
 666                         replay_svn_ancestors(ancestors, source_repos_url, source_url, target_url)
 667                     else:
 668                         # Just do a straight "svn copy" for files. There isn't any kind of "dependent"
 669                         # history we might need to replay like for folders.
 670                         # TODO: Is this logic really correct? Doing a WC vs URL "svn copy" based on existence
 671                         #       of *source* location seems a bit kludgy. Should there be a running list of
 672                         #       renames during replay_svn_ancestors >> process_svn_log_entry?
 673                         if os.path.exists(copyfrom_path):
 674                             # If the copyfrom_path exists in the working-copy, do a local copy
 675                             run_svn(["copy", copyfrom_path, path_offset])
 676                         else:
 677                             # Else, could be a situation where replay_svn_ancestors() is replaying branch
 678                             # history and a copy was committed across two revisions: first the deletion
 679                             # followed by the later add. In such a case, we need to copy from HEAD (dup_rev)
 680                             # of the path in *target_url*
 681                             run_svn(["copy", "-r", dup_rev, target_url+"/"+copyfrom_path+"@"+str(dup_rev), path_offset])
 682             # Else just copy/export the files from the source repo and "svn add" them.
 683             else:
 684                 # Create (parent) directory if needed
 685                 if d['kind'] == 'dir':
 686                     p_path = path_offset
 687                 else:
 688                     p_path = os.path.dirname(path_offset).strip() or '.'
 689                 if not os.path.exists(p_path):
 690                     os.makedirs(p_path)
 691                 # Export the entire added tree.
 692                 run_svn(["export", "--force", "-r", str(copyfrom_rev),
 693                          source_repos_url + copyfrom_path + "@" + str(copyfrom_rev), path_offset])
 694                 # TODO: The "no in_svn" condition here is wrong for replace cases.
 695                 #       Added the in_svn condition here originally since "svn export" is recursive
 696                 #       but "svn log" will have an entry for each indiv file, hence we run into a
 697                 #       cannot-re-add-file-which-is-already-added issue.
 698                 if (not in_svn(path_offset)) or (is_replace):
 699                     run_svn(["add", "--parents", path_offset])
 700                 # TODO: Need to copy SVN properties from source repos
 701
 702         elif action == 'D':
 703             # Queue "svn remove" commands, to allow the action == 'A' handling the opportunity
 704             # to do smart "svn copy" handling on copy/move/renames.
 705             removed_paths.append(path_offset)
 706
 707         elif action == 'M':
 708             if svnlog_verbose:
 709                 print " " + action + " " + d['path']
 710             out = run_svn(["merge", "-c", str(svn_rev), "--non-recursive",
 711                      "--non-interactive", "--accept=theirs-full",
 712                      source_url+"/"+path_offset+"@"+str(svn_rev), path_offset])
 713
 714         else:
 715             display_error("Internal Error: pull_svn_rev: Unhandled 'action' value: '" + action + "'")
 716
 717     if removed_paths:
 718         for path_offset in removed_paths:
 719             if svnlog_verbose:
 720                 print " D " + source_base+"/"+path_offset
 721             run_svn(["remove", "--force", path_offset])
 722
 723     if unrelated_paths:
 724         print "Unrelated paths: (vs. '" + source_base + "')"
 725         print "*", unrelated_paths
 726
 727     return commit_paths
 728
 729 def pull_svn_rev(log_entry, source_repos_url, source_repos_uuid, source_url, target_url, keep_author=False):
 730     """
 731     Pull SVN changes from the given log entry.
 732     Returns the new SVN revision.
 733     If an exception occurs, it will rollback to revision 'svn_rev - 1'.
 734     """
 735     ## Get the relative offset of source_url based on source_repos_url, e.g. u'/branches/bug123'
 736     #source_base = source_url[len(source_repos_url):]
 737
 738     svn_rev = log_entry['revision']
 739     print "\n(Starting source rev #"+str(svn_rev)+":)"
 740     print "r"+str(log_entry['revision']) + " | " + \
 741           log_entry['author'] + " | " + \
 742           str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' '))
 743     print log_entry['message']
 744     print "------------------------------------------------------------------------"
 745     commit_paths = process_svn_log_entry(log_entry, source_repos_url, source_url, target_url)
 746
 747     # If we had too many individual paths to commit, wipe the list and just commit at
 748     # the root of the working copy.
 749     if len (commit_paths) > 99:
 750         commit_paths = []
 751
 752     # TODO: Use SVN properties to track source URL + rev in the target repo?
 753     #       This would provide a more reliable resume-support
 754     try:
 755         commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author)
 756     except ExternalCommandFailed:
 757         # try to ignore the Properties conflicts on files and dirs
 758         # use the copy from original_wc
 759         # TODO: Need to re-work this?
 760         #has_Conflict = False
 761         #for d in log_entry['changed_paths']:
 762         #    p = d['path']
 763         #    p = p[len(source_base):].strip("/")
 764         #    if os.path.isfile(p):
 765         #        if os.path.isfile(p + ".prej"):
 766         #            has_Conflict = True
 767         #            shutil.copy(original_wc + os.sep + p, p)
 768         #            p2=os.sep + p.replace('_', '__').replace('/', '_') \
 769         #                      + ".prej-" + str(svn_rev)
 770         #            shutil.move(p + ".prej", os.path.dirname(original_wc) + p2)
 771         #            w="\n### Properties conflicts ignored:"
 772         #            print "%s %s, in revision: %s\n" % (w, p, svn_rev)
 773         #    elif os.path.isdir(p):
 774         #        if os.path.isfile(p + os.sep + "dir_conflicts.prej"):
 775         #            has_Conflict = True
 776         #            p2=os.sep + p.replace('_', '__').replace('/', '_') \
 777         #                      + "_dir__conflicts.prej-" + str(svn_rev)
 778         #            shutil.move(p + os.sep + "dir_conflicts.prej",
 779         #                        os.path.dirname(original_wc) + p2)
 780         #            w="\n### Properties conflicts ignored:"
 781         #            print "%s %s, in revision: %s\n" % (w, p, svn_rev)
 782         #            out = run_svn(["propget", "svn:ignore",
 783         #                           original_wc + os.sep + p])
 784         #            if out:
 785         #                run_svn(["propset", "svn:ignore", out.strip(), p])
 786         #            out = run_svn(["propget", "svn:externel",
 787         #                           original_wc + os.sep + p])
 788         #            if out:
 789         #                run_svn(["propset", "svn:external", out.strip(), p])
 790         ## try again
 791         #if has_Conflict:
 792         #    commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author)
 793         #else:
 794             raise ExternalCommandFailed
 795
 796     # Add source-tracking revprop's
 797     run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_uuid", source_repos_uuid])
 798     run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_url", source_repos_url])
 799     run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_rev", svn_rev])
 800     print "(Finished source rev #"+str(svn_rev)+")"
 801
 802
 803 def main():
 804     usage = "Usage: %prog [-a] [-c] [-r SVN rev] <Source SVN URL> <Target SVN URL>"
 805     parser = OptionParser(usage)
 806     parser.add_option("-a", "--keep-author", action="store_true",
 807                       dest="keep_author", help="Keep revision Author or not")
 808     parser.add_option("-c", "--continue-from-break", action="store_true",
 809                       dest="cont_from_break",
 810                       help="Continue from previous break")
 811     parser.add_option("-r", "--svn-rev", type="int", dest="svn_rev",
 812                       help="SVN revision to checkout from")
 813     (options, args) = parser.parse_args()
 814     if len(args) != 2:
 815         display_error("incorrect number of arguments\n\nTry: svn2svn.py --help",
 816                       False)
 817
 818     source_url = args.pop(0).rstrip("/")
 819     target_url = args.pop(0).rstrip("/")
 820     if options.keep_author:
 821         keep_author = True
 822     else:
 823         keep_author = False
 824
 825     # Find the greatest_rev in the source repo
 826     svn_info = get_svn_info(source_url)
 827     greatest_rev = svn_info['revision']
 828     # Get the base URL for the source repos, e.g. u'svn://svn.example.com/svn/repo'
 829     source_repos_url = svn_info['repos_url']
 830     # Get the UUID for the source repos
 831     source_repos_uuid = svn_info['repos_uuid']
 832
 833     dup_wc = "_dup_wc"
 834
 835     # if old working copy does not exist, disable continue mode
 836     # TODO: Better continue support. Maybe include source repo's rev # in target commit info?
 837     if not os.path.exists(dup_wc):
 838         options.cont_from_break = False
 839
 840     if not options.cont_from_break:
 841         # Warn if Target SVN URL existed
 842         cmd = find_program("svn")
 843         pipe = Popen([cmd] + ["list"] + [target_url], executable=cmd,
 844                      stdout=PIPE, stderr=PIPE)
 845         out, err = pipe.communicate()
 846         if pipe.returncode == 0:
 847             print "Target SVN URL: %s existed!" % target_url
 848             if out:
 849                 print out
 850             print "Press 'Enter' to Continue, 'Ctrl + C' to Cancel..."
 851             print "(Timeout in 5 seconds)"
 852             rfds, wfds, efds = select.select([sys.stdin], [], [], 5)
 853
 854         # Get log entry for the SVN revision we will check out
 855         if options.svn_rev:
 856             # If specify a rev, get log entry just before or at rev
 857             svn_start_log = get_last_svn_log_entry(source_url, 1, options.svn_rev, False)
 858         else:
 859             # Otherwise, get log entry of branch creation
 860             # TODO: This call is *very* expensive on a repo with lots of revisions.
 861             #       Even though the call is passing --limit 1, it seems like that limit-filter
 862             #       is happening after SVN has fetched the full log history.
 863             svn_start_log = get_first_svn_log_entry(source_url, 1, greatest_rev, False)
 864
 865         # This is the revision we will start from for source_url
 866         svn_rev = svn_start_log['revision']
 867
 868         # Check out a working copy of target_url
 869         dup_wc = os.path.abspath(dup_wc)
 870         if os.path.exists(dup_wc):
 871             shutil.rmtree(dup_wc)
 872         svn_checkout(target_url, dup_wc)
 873         os.chdir(dup_wc)
 874
 875         # For the initial commit to the target URL, export all the contents from
 876         # the source URL at the start-revision.
 877         paths = run_svn(["list", "-r", str(svn_rev), source_url+"@"+str(svn_rev)])
 878         paths = paths.strip("\n").split("\n")
 879         for path in paths:
 880             if not path:
 881                 # Skip null lines
 882                 break
 883             # Directories have a trailing slash in the "svn list" output
 884             if path[-1] == "/":
 885                 path=path.rstrip('/')
 886                 if not os.path.exists(path):
 887                     os.makedirs(path)
 888             run_svn(["export", "--force", "-r" , str(svn_rev), source_url+"/"+path+"@"+str(svn_rev), path])
 889             run_svn(["add", path])
 890         commit_from_svn_log_entry(svn_start_log, [], keep_author)
 891         # Add source-tracking revprop's
 892         run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_uuid", source_repos_uuid])
 893         run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_url", source_repos_url])
 894         run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_rev", svn_rev])
 895     else:
 896         dup_wc = os.path.abspath(dup_wc)
 897         os.chdir(dup_wc)
 898         # TODO: Need better resume support. For the time being, expect caller explictly passes in resume revision.
 899         svn_rev = options.svn_rev
 900         if svn_rev < 1:
 901             display_error("Invalid arguments\n\nNeed to pass result rev # (-r) when using continue-mode (-c)", False)
 902
 903     # Load SVN log starting from svn_rev + 1
 904     it_log_entries = iter_svn_log_entries(source_url, svn_rev + 1, greatest_rev)
 905
 906     try:
 907         for log_entry in it_log_entries:
 908             # Replay this revision from source_url into target_url
 909             pull_svn_rev(log_entry, source_repos_url, source_repos_uuid, source_url, target_url, keep_author)
 910             # Update our target working-copy, to ensure everything says it's at the new HEAD revision
 911             run_svn(["up", dup_wc])
 912
 913     except KeyboardInterrupt:
 914         print "\nStopped by user."
 915         run_svn(["cleanup"])
 916         run_svn(["revert", "--recursive", "."])
 917     except:
 918         print "\nCommand failed with following error:\n"
 919         traceback.print_exc()
 920         run_svn(["cleanup"])
 921         run_svn(["revert", "--recursive", "."])
 922     finally:
 923         run_svn(["up"])
 924         print "\nFinished!"
 925
 926
 927 if __name__ == "__main__":
 928     main()
 929
 930 # vim:sts=4:sw=4: