svn2svn.py

   1 #!/usr/bin/env python
   2 """
   3 svn2svn.py
   4
   5 Replicate (replay) changesets from one SVN repository to another:
   6 * Maintains full logical history (e.g. uses "svn copy" for renames).
   7 * Maintains original commit messages.
   8 * Cannot maintain original commit date, but appends original commit date
   9   for each commit message: "Date: %d".
  10 * Optionally maintain source author info. (Only supported if accessing
  11   target SVN repo via file://)
  12 * Optionally run an external shell script before each replayed commit
  13   to give the ability to dynamically exclude or modify files as part
  14   of the replay.
  15
  16 License: GPLv2, the same as hgsvn.
  17 Author: Tony Duckles (https://github.com/tonyduckles/svn2svn)
  18 (This is a forked and modified verison of http://code.google.com/p/svn2svn/)
  19 """
  20
  21 import os
  22 import sys
  23 import time
  24 import locale
  25 import shutil
  26 import select
  27 import calendar
  28 import traceback
  29
  30 from optparse import OptionParser
  31 from subprocess import Popen, PIPE
  32 from datetime import datetime
  33
  34 try:
  35     from xml.etree import cElementTree as ET
  36 except ImportError:
  37     try:
  38         from xml.etree import ElementTree as ET
  39     except ImportError:
  40         try:
  41             import cElementTree as ET
  42         except ImportError:
  43             from elementtree import ElementTree as ET
  44
  45 svn_log_args = ['log', '--xml', '-v']
  46 svn_info_args = ['info', '--xml']
  47 svn_checkout_args = ['checkout', '-q']
  48 svn_status_args = ['status', '--xml', '-v', '--ignore-externals']
  49
  50 # Setup debug options
  51 debug = False
  52 debug_runsvn_timing = False    # Display how long each "svn" OS command took to run?
  53 # Setup verbosity options
  54 runsvn_showcmd = False    # Display every "svn" OS command we run?
  55 runsvn_showout = False    # Display the stdout results from every  "svn" OS command we run?
  56 svnlog_verbose = True     # Display each action + changed-path as we walk the history?
  57
  58 # define exception class
  59 class ExternalCommandFailed(RuntimeError):
  60     """
  61     An external command failed.
  62     """
  63
  64 def display_error(message, raise_exception = True):
  65     """
  66     Display error message, then terminate.
  67     """
  68     print "Error:", message
  69     print
  70     if raise_exception:
  71         raise ExternalCommandFailed
  72     else:
  73         sys.exit(1)
  74
  75 # Windows compatibility code by Bill Baxter
  76 if os.name == "nt":
  77     def find_program(name):
  78         """
  79         Find the name of the program for Popen.
  80         Windows is finnicky about having the complete file name. Popen
  81         won't search the %PATH% for you automatically.
  82         (Adapted from ctypes.find_library)
  83         """
  84         # See MSDN for the REAL search order.
  85         base, ext = os.path.splitext(name)
  86         if ext:
  87             exts = [ext]
  88         else:
  89             exts = ['.bat', '.exe']
  90         for directory in os.environ['PATH'].split(os.pathsep):
  91             for e in exts:
  92                 fname = os.path.join(directory, base + e)
  93                 if os.path.exists(fname):
  94                     return fname
  95         return None
  96 else:
  97     def find_program(name):
  98         """
  99         Find the name of the program for Popen.
 100         On Unix, popen isn't picky about having absolute paths.
 101         """
 102         return name
 103
 104 def shell_quote(s):
 105     if os.name == "nt":
 106         q = '"'
 107     else:
 108         q = "'"
 109     return q + s.replace('\\', '\\\\').replace("'", "'\"'\"'") + q
 110
 111 locale_encoding = locale.getpreferredencoding()
 112
 113 def run_svn(args, fail_if_stderr=False, encoding="utf-8"):
 114     """
 115     Run svn cmd in PIPE
 116     exit if svn cmd failed
 117     """
 118     def _transform_arg(a):
 119         if isinstance(a, unicode):
 120             a = a.encode(encoding or locale_encoding)
 121         elif not isinstance(a, str):
 122             a = str(a)
 123         return a
 124     t_args = map(_transform_arg, args)
 125
 126     cmd = find_program("svn")
 127     cmd_string = str(" ".join(map(shell_quote, [cmd] + t_args)))
 128     if runsvn_showcmd:
 129         print "$", "("+os.getcwd()+")", cmd_string
 130     if debug_runsvn_timing:
 131         time1 = time.time()
 132     pipe = Popen([cmd] + t_args, executable=cmd, stdout=PIPE, stderr=PIPE)
 133     out, err = pipe.communicate()
 134     if debug_runsvn_timing:
 135         time2 = time.time()
 136         print "(" + str(round(time2-time1,4)) + " elapsed)"
 137     if out and runsvn_showout:
 138         print out
 139     if pipe.returncode != 0 or (fail_if_stderr and err.strip()):
 140         display_error("External program failed (return code %d): %s\n%s"
 141             % (pipe.returncode, cmd_string, err))
 142     return out
 143
 144 def svn_date_to_timestamp(svn_date):
 145     """
 146     Parse an SVN date as read from the XML output and
 147     return the corresponding timestamp.
 148     """
 149     # Strip microseconds and timezone (always UTC, hopefully)
 150     # XXX there are various ISO datetime parsing routines out there,
 151     # cf. http://seehuhn.de/comp/pdate
 152     date = svn_date.split('.', 2)[0]
 153     time_tuple = time.strptime(date, "%Y-%m-%dT%H:%M:%S")
 154     return calendar.timegm(time_tuple)
 155
 156 def parse_svn_info_xml(xml_string):
 157     """
 158     Parse the XML output from an "svn info" command and extract
 159     useful information as a dict.
 160     """
 161     d = {}
 162     tree = ET.fromstring(xml_string)
 163     entry = tree.find('.//entry')
 164     if entry:
 165         d['url'] = entry.find('url').text
 166         d['revision'] = int(entry.get('revision'))
 167         d['repos_url'] = tree.find('.//repository/root').text
 168         d['last_changed_rev'] = int(tree.find('.//commit').get('revision'))
 169         d['kind'] = entry.get('kind')
 170     return d
 171
 172 def parse_svn_log_xml(xml_string):
 173     """
 174     Parse the XML output from an "svn log" command and extract
 175     useful information as a list of dicts (one per log changeset).
 176     """
 177     l = []
 178     tree = ET.fromstring(xml_string)
 179     for entry in tree.findall('logentry'):
 180         d = {}
 181         d['revision'] = int(entry.get('revision'))
 182         # Some revisions don't have authors, most notably
 183         # the first revision in a repository.
 184         author = entry.find('author')
 185         d['author'] = author is not None and author.text or None
 186         d['date'] = svn_date_to_timestamp(entry.find('date').text)
 187         # Some revisions may have empty commit message
 188         message = entry.find('msg')
 189         message = message is not None and message.text is not None \
 190                         and message.text.strip() or ""
 191         # Replace DOS return '\r\n' and MacOS return '\r' with unix return '\n'
 192         d['message'] = message.replace('\r\n', '\n').replace('\n\r', '\n'). \
 193                                replace('\r', '\n')
 194         paths = d['changed_paths'] = []
 195         for path in entry.findall('.//path'):
 196             copyfrom_rev = path.get('copyfrom-rev')
 197             if copyfrom_rev:
 198                 copyfrom_rev = int(copyfrom_rev)
 199             paths.append({
 200                 'path': path.text,
 201                 'kind': path.get('kind'),
 202                 'action': path.get('action'),
 203                 'copyfrom_path': path.get('copyfrom-path'),
 204                 'copyfrom_revision': copyfrom_rev,
 205             })
 206         # Need to sort paths (i.e. into hierarchical order), so that process_svn_log_entry()
 207         # can process actions in depth-first order.
 208         paths.sort()
 209         l.append(d)
 210     return l
 211
 212 def parse_svn_status_xml(xml_string, base_dir=None):
 213     """
 214     Parse the XML output from an "svn status" command and extract
 215     useful info as a list of dicts (one per status entry).
 216     """
 217     l = []
 218     tree = ET.fromstring(xml_string)
 219     for entry in tree.findall('.//entry'):
 220         d = {}
 221         path = entry.get('path')
 222         if base_dir is not None:
 223             assert path.startswith(base_dir)
 224             path = path[len(base_dir):].lstrip('/\\')
 225         d['path'] = path
 226         wc_status = entry.find('wc-status')
 227         if wc_status.get('item') == 'external':
 228             d['type'] = 'external'
 229         elif wc_status.get('revision') is not None:
 230             d['type'] = 'normal'
 231         else:
 232             d['type'] = 'unversioned'
 233         l.append(d)
 234     return l
 235
 236 def get_svn_info(svn_url_or_wc, rev_number=None):
 237     """
 238     Get SVN information for the given URL or working copy,
 239     with an optionally specified revision number.
 240     Returns a dict as created by parse_svn_info_xml().
 241     """
 242     if rev_number is not None:
 243         args = [svn_url_or_wc + "@" + str(rev_number)]
 244     else:
 245         args = [svn_url_or_wc]
 246     xml_string = run_svn(svn_info_args + args, fail_if_stderr=True)
 247     return parse_svn_info_xml(xml_string)
 248
 249 def svn_checkout(svn_url, checkout_dir, rev_number=None):
 250     """
 251     Checkout the given URL at an optional revision number.
 252     """
 253     args = []
 254     if rev_number is not None:
 255         args += ['-r', rev_number]
 256     args += [svn_url, checkout_dir]
 257     return run_svn(svn_checkout_args + args)
 258
 259 def run_svn_log(svn_url_or_wc, rev_start, rev_end, limit, stop_on_copy=False):
 260     """
 261     Fetch up to 'limit' SVN log entries between the given revisions.
 262     """
 263     if stop_on_copy:
 264         args = ['--stop-on-copy']
 265     else:
 266         args = []
 267     url = str(svn_url_or_wc)
 268     if rev_start != 'HEAD' and rev_end != 'HEAD':
 269         args += ['-r', '%s:%s' % (rev_start, rev_end)]
 270         if not "@" in svn_url_or_wc:
 271             url += "@" + str(rev_end)
 272     args += ['--limit', str(limit), url]
 273     xml_string = run_svn(svn_log_args + args)
 274     return parse_svn_log_xml(xml_string)
 275
 276 def get_svn_status(svn_wc, flags=None):
 277     """
 278     Get SVN status information about the given working copy.
 279     """
 280     # Ensure proper stripping by canonicalizing the path
 281     svn_wc = os.path.abspath(svn_wc)
 282     args = []
 283     if flags:
 284         args += [flags]
 285     args += [svn_wc]
 286     xml_string = run_svn(svn_status_args + args)
 287     return parse_svn_status_xml(xml_string, svn_wc)
 288
 289 def get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=False):
 290     """
 291     Get the first SVN log entry in the requested revision range.
 292     """
 293     entries = run_svn_log(svn_url, rev_start, rev_end, 1, stop_on_copy)
 294     if not entries:
 295         display_error("No SVN log for %s between revisions %s and %s" %
 296                       (svn_url, rev_start, rev_end))
 297
 298     return entries[0]
 299
 300 def get_first_svn_log_entry(svn_url, rev_start, rev_end):
 301     """
 302     Get the first log entry after/at the given revision number in an SVN branch.
 303     By default the revision number is set to 0, which will give you the log
 304     entry corresponding to the branch creaction.
 305
 306     NOTE: to know whether the branch creation corresponds to an SVN import or
 307     a copy from another branch, inspect elements of the 'changed_paths' entry
 308     in the returned dictionary.
 309     """
 310     return get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True)
 311
 312 def get_last_svn_log_entry(svn_url, rev_start, rev_end):
 313     """
 314     Get the last log entry before/at the given revision number in an SVN branch.
 315     By default the revision number is set to HEAD, which will give you the log
 316     entry corresponding to the latest commit in branch.
 317     """
 318     return get_one_svn_log_entry(svn_url, rev_end, rev_start, stop_on_copy=True)
 319
 320
 321 log_duration_threshold = 10.0
 322 log_min_chunk_length = 10
 323
 324 def iter_svn_log_entries(svn_url, first_rev, last_rev):
 325     """
 326     Iterate over SVN log entries between first_rev and last_rev.
 327
 328     This function features chunked log fetching so that it isn't too nasty
 329     to the SVN server if many entries are requested.
 330     """
 331     cur_rev = first_rev
 332     chunk_length = log_min_chunk_length
 333     chunk_interval_factor = 1.0
 334     while last_rev == "HEAD" or cur_rev <= last_rev:
 335         start_t = time.time()
 336         stop_rev = min(last_rev, cur_rev + int(chunk_length * chunk_interval_factor))
 337         entries = run_svn_log(svn_url, cur_rev, stop_rev, chunk_length)
 338         duration = time.time() - start_t
 339         if not entries:
 340             if stop_rev == last_rev:
 341                 break
 342             cur_rev = stop_rev + 1
 343             chunk_interval_factor *= 2.0
 344             continue
 345         for e in entries:
 346             yield e
 347         cur_rev = e['revision'] + 1
 348         # Adapt chunk length based on measured request duration
 349         if duration < log_duration_threshold:
 350             chunk_length = int(chunk_length * 2.0)
 351         elif duration > log_duration_threshold * 2:
 352             chunk_length = max(log_min_chunk_length, int(chunk_length / 2.0))
 353
 354 def commit_from_svn_log_entry(entry, files=None, keep_author=False):
 355     """
 356     Given an SVN log entry and an optional sequence of files, do an svn commit.
 357     """
 358     # TODO: Run optional external shell hook here, for doing pre-commit filtering
 359     # This will use the local timezone for displaying commit times
 360     timestamp = int(entry['date'])
 361     svn_date = str(datetime.fromtimestamp(timestamp))
 362     # Uncomment this one one if you prefer UTC commit times
 363     #svn_date = "%d 0" % timestamp
 364     if keep_author:
 365         options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date, "--username", entry['author']]
 366     else:
 367         options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date + "\nAuthor: " + entry['author']]
 368     if files:
 369         options += list(files)
 370     run_svn(options)
 371
 372 def in_svn(p):
 373     """
 374     Check if a given file/folder is being tracked by Subversion.
 375     Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
 376     With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
 377     Use "svn status" to check the status of the file/folder.
 378     """
 379     # TODO: Is there a better way to do this?
 380     entries = get_svn_status(p)
 381     if not entries:
 382       return False
 383     d = entries[0]
 384     return (d['type'] == 'normal')
 385
 386 def find_svn_ancestors(source_repos_url, source_base, source_offset, copyfrom_path, copyfrom_rev):
 387     """
 388     Given a copy-from path (copyfrom_path), walk the SVN history backwards to inspect
 389     the ancestory of that path. Build a collection of copyfrom_path+revision pairs
 390     for each of the branch-copies since the initial branch-creation.  If we find a
 391     copyfrom_path which source_base is a substring match of (e.g. we crawled back to
 392     the initial branch-copy from trunk), then return the collection of ancestor paths.
 393     Otherwise, copyfrom_path has no ancestory compared to source_base.
 394
 395     This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
 396     file/folder was renamed in a branch and then that branch was merged back to trunk.
 397
 398     PARAMETERS:
 399     * source_repos_url = Full URL to root of repository, e.g. 'file:///path/to/repos'
 400     * source_base = e.g. '/trunk'
 401     * source_offset = e.g. 'projectA/file1.txt'
 402     * copyfrom_path = e.g. '/branches/bug123/projectA/file1.txt'
 403     """
 404
 405     done = False
 406     working_path = copyfrom_path
 407     working_base = copyfrom_path[:-len(source_offset)].rstrip('/')
 408     working_offset = source_offset.strip('/')
 409     working_rev = copyfrom_rev
 410     ancestors = [{'path': [working_base, working_offset], 'revision': working_rev}]
 411     while not done:
 412         # Get the first "svn log" entry for this path (relative to @rev)
 413         #working_path = working_base + "/" + working_offset
 414         if debug:
 415             print ">> find_svn_ancestors: " + source_repos_url + working_path + "@" + str(working_rev) + \
 416                    "  (" + working_base + " " + working_offset + ")"
 417         log_entry = get_first_svn_log_entry(source_repos_url + working_path + "@" + str(working_rev), 1, str(working_rev))
 418         if not log_entry:
 419             done = True
 420         # Find the action for our working_path in this revision
 421         for d in log_entry['changed_paths']:
 422             path = d['path']
 423             if not path in working_path:
 424                 continue
 425             # Check action-type for this file
 426             action = d['action']
 427             if action not in 'MARD':
 428                 display_error("In SVN rev. %d: action '%s' not supported. \
 429                                Please report a bug!" % (log_entry['revision'], action))
 430             if debug:
 431                 debug_desc = ": " + action + " " + path
 432                 if d['copyfrom_path']:
 433                     debug_desc += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
 434                 print debug_desc
 435
 436             if action == 'R':
 437                 # If file/folder was replaced, it has no ancestor
 438                 return []
 439             if action == 'D':
 440                 # If file/folder was deleted, it has no ancestor
 441                 return []
 442             if action == 'A':
 443                 # If file/folder was added but not a copy, it has no ancestor
 444                 if not d['copyfrom_path']:
 445                     return []
 446                 # Else, file/folder was added and is a copy, so check ancestors
 447                 path_old = d['copyfrom_path']
 448                 working_path = working_path.replace(path, path_old)
 449                 if working_base in working_path:
 450                     # If the new and old working_path share the same working_base, just need to update working_offset.
 451                     working_offset = working_path[len(working_base)+1:]
 452                 else:
 453                     # Else, assume that working_base has changed but working_offset is the same, e.g. a re-branch.
 454                     # TODO: Is this a safe assumption?!
 455                     working_base = working_path[:-len(working_offset)].rstrip('/')
 456                 working_rev = d['copyfrom_revision']
 457                 if debug:
 458                     print ">> find_svn_ancestors: copy-from: " + working_base + " " + working_offset + "@" + str(working_rev)
 459                 ancestors.append({'path': [working_base, working_offset], 'revision': working_rev})
 460                 # If we found a copy-from case which matches our source_base, we're done
 461                 if (path_old == source_base) or (path_old.startswith(source_base + "/")):
 462                     return ancestors
 463                 # Else, follow the copy and keep on searching
 464                 break
 465     return None
 466
 467 def replay_svn_ancestors(ancestors, source_repos_url, source_url, target_url):
 468     """
 469     Given an array of ancestor info (find_svn_ancestors), replay the history
 470     to correctly track renames ("svn copy/move") across branch-merges.
 471
 472     For example, consider a sequence of events like this:
 473     1. svn copy /trunk /branches/fix1
 474     2. (Make some changes on /branches/fix1)
 475     3. svn copy /branches/fix1/Proj1 /branches/fix1/Proj2  " Rename folder
 476     4. svn copy /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt  " Rename file inside renamed folder
 477     5. svn co /trunk && svn merge /branches/fix1
 478     After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
 479     and and add of /trunk/Proj2 comp-from /branches/fix1/Proj2. If we were just
 480     to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
 481     we'd lose the logical history that Proj2/file2.txt is really a descendant
 482     of Proj1/file1.txt.
 483
 484     'source_repos_url' is the full URL to the root of the source repository.
 485     'ancestors' is the array returned by find_svn_ancestors() with the final
 486       destination info appended to it by process_svn_log_entry().
 487     'dest_path'
 488     """
 489     # Ignore ancestors[0], which is the original (pre-branch-copy) trunk path
 490     # Ignore ancestors[1], which is the original branch-creation commit
 491     # Ignore ancestors[n], which is the final commit back to trunk
 492     for idx in range(1, len(ancestors)-1):
 493         ancestor = ancestors[idx]
 494         source_base = ancestor['path'][0]
 495         source_offset = ancestor['path'][1]
 496         source_path = source_base + "/" + source_offset
 497         source_rev = ancestor['revision']
 498         source_rev_next = ancestors[idx+1]['revision']
 499         # Do a "svn log" on the _parent_ directory of source_path, since trying to get log info
 500         # for the "old path" on the revision where the copy/move happened will fail.
 501         if "/" in source_path:
 502             p_source_path = source_path[:source_path.rindex('/')]
 503         else:
 504             p_source_path = ""
 505         if debug:
 506             print ">> replay_svn_ancestors: ["+str(idx)+"]" + source_path+"@"+str(source_rev) + "  ["+p_source_path+"@"+str(source_rev)+":"+str(source_rev_next-1)+"]"
 507         it_log_entries = iter_svn_log_entries(source_repos_url+p_source_path, source_rev, source_rev_next-1)
 508         for log_entry in it_log_entries:
 509             #print ">> replay_svn_ancestors: log_entry: (" + source_repos_url+source_base + ")"
 510             #print log_entry
 511             process_svn_log_entry(log_entry, source_repos_url, source_repos_url+source_base, target_url)
 512
 513 def process_svn_log_entry(log_entry, source_repos_url, source_url, target_url, source_offset=""):
 514     """
 515     Process SVN changes from the given log entry.
 516     Returns array of all the paths in the working-copy that were changed,
 517     i.e. the paths which need to be "svn commit".
 518
 519     'log_entry' is the array structure built by parse_svn_log_xml().
 520     'source_repos_url' is the full URL to the root of the source repository.
 521     'source_url' is the full URL to the source path in the source repository.
 522     'target_url' is the full URL to the target path in the target repository.
 523     """
 524     # Get the relative offset of source_url based on source_repos_url, e.g. u'/branches/bug123'
 525     source_base = source_url[len(source_repos_url):]
 526     if debug:
 527         print ">> process_svn_log_entry: " + source_url + " (" + source_base + ")"
 528
 529     svn_rev = log_entry['revision']
 530
 531     removed_paths = []
 532     modified_paths = []
 533     unrelated_paths = []
 534     commit_paths = []
 535
 536     for d in log_entry['changed_paths']:
 537         if svnlog_verbose:
 538             msg = " " + d['action'] + " " + d['path']
 539             if d['copyfrom_path']:
 540                 msg += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
 541             print msg
 542         # Get the full path for this changed_path
 543         # e.g. u'/branches/bug123/projectA/file1.txt'
 544         path = d['path']
 545         if not path.startswith(source_base + "/"):
 546             # Ignore changed files that are not part of this subdir
 547             if path != source_base:
 548                 print ">> process_svn_log_entry: Unrelated path: " + path + "  (" + source_base + ")"
 549                 unrelated_paths.append(path)
 550             continue
 551         # Calculate the offset (based on source_base) for this changed_path
 552         # e.g. u'projectA/file1.txt'
 553         # (path = source_base + "/" + path_offset)
 554         path_offset = path[len(source_base):].strip("/")
 555         # Get the action for this path
 556         action = d['action']
 557         if action not in 'MARD':
 558             display_error("In SVN rev. %d: action '%s' not supported. \
 559                            Please report a bug!" % (svn_rev, action))
 560
 561         # Try to be efficient and keep track of an explicit list of paths in the
 562         # working copy that changed. If we commit from the root of the working copy,
 563         # then SVN needs to crawl the entire working copy looking for pending changes.
 564         # But, if we gather too many paths to commit, then we wipe commit_paths below
 565         # and end-up doing a commit at the root of the working-copy.
 566         if len (commit_paths) < 100:
 567             commit_paths.append(path_offset)
 568
 569         # Special-handling for replace's
 570         if action == 'R':
 571             # If file was "replaced" (deleted then re-added, all in same revision),
 572             # then we need to run the "svn rm" first, then change action='A'. This
 573             # lets the normal code below handle re-"svn add"'ing the files. This
 574             # should replicate the "replace".
 575             run_svn(["up", path_offset])
 576             run_svn(["remove", "--force", path_offset])
 577             action = 'A'
 578
 579         # Handle all the various action-types
 580         # (Handle "add" first, for "svn copy/move" support)
 581         if action == 'A':
 582             # Determine where to export from
 583             copyfrom_rev = svn_rev
 584             copyfrom_path = path
 585             svn_copy = False
 586             # Handle cases where this "add" was a copy from another URL in the source repos
 587             if d['copyfrom_revision']:
 588                 copyfrom_rev = d['copyfrom_revision']
 589                 copyfrom_path = d['copyfrom_path']
 590                 if debug:
 591                     print ">> process_svn_log_entry: copy-to: " + source_base + " " + source_offset + " " + path_offset
 592                 if source_base in copyfrom_path:
 593                     # If the copy-from path is inside the current working-copy, no need to check ancestry.
 594                     ancestors = []
 595                     copyfrom_path = copyfrom_path[len(source_base):].strip("/")
 596                     if debug:
 597                         print ">> process_svn_log_entry: Found copy: " + copyfrom_path+"@"+str(copyfrom_rev)
 598                     svn_copy = True
 599                 else:
 600                     ancestors = find_svn_ancestors(source_repos_url, source_base, path_offset,
 601                                                    copyfrom_path, copyfrom_rev)
 602                 if ancestors:
 603                     # Reverse the list, so that we loop in chronological order
 604                     ancestors.reverse()
 605                     # Append the current revision
 606                     ancestors.append({'path': [source_base, path_offset], 'revision': svn_rev})
 607                     # ancestors[0] is the original (pre-branch-copy) trunk path.
 608                     # ancestors[1] is the first commit on the new branch.
 609                     copyfrom_rev =  ancestors[0]['revision']
 610                     copyfrom_base = ancestors[0]['path'][0]
 611                     copyfrom_offset = ancestors[0]['path'][1]
 612                     copyfrom_path = copyfrom_base + copyfrom_offset
 613                     if debug:
 614                         print ">> process_svn_log_entry: FOUND PARENT:"
 615                         for idx in range(0,len(ancestors)):
 616                             ancestor = ancestors[idx]
 617                             print "     ["+str(idx)+"] " + ancestor['path'][0]+" "+ancestor['path'][1]+"@"+str(ancestor['revision'])
 618                     #print ">> process_svn_log_entry: copyfrom_path (before): " + copyfrom_path + " source_base: " + source_base + " p: " + p
 619                     copyfrom_path = copyfrom_path[len(source_base):].strip("/")
 620                     #print ">> process_svn_log_entry: copyfrom_path (after): " + copyfrom_path
 621                     svn_copy = True
 622             # If this add was a copy-from, do a smart replay of the ancestors' history.
 623             # Else just copy/export the files from the source repo and "svn add" them.
 624             if svn_copy:
 625                 if debug:
 626                     print ">> process_svn_log_entry: svn_copy: copy-from: " + copyfrom_path+"@"+str(copyfrom_rev) + "  source_base: "+source_base + "  len(ancestors): " + str(len(ancestors))
 627                 # If we don't have any ancestors, then this is just a straight "svn copy" in the current working-copy.
 628                 if not ancestors:
 629                     # ...but not if the target is already tracked, because this might run several times for the same path.
 630                     # TODO: Is there a better way to avoid recusion bugs? Maybe a collection of processed paths?
 631                     if not in_svn(path_offset):
 632                         run_svn(["copy", copyfrom_path, path_offset])
 633                 else:
 634                     # Replay any actions which happened to this folder from the ancestor path(s).
 635                     replay_svn_ancestors(ancestors, source_repos_url, source_url, target_url)
 636             else:
 637                 # Create (parent) directory if needed
 638                 if d['kind'] == 'dir':
 639                     p_path = path_offset
 640                 else:
 641                     p_path = os.path.dirname(path_offset).strip() or '.'
 642                 if not os.path.exists(p_path):
 643                     os.makedirs(p_path)
 644                 # Export the entire added tree.
 645                 run_svn(["export", "--force", "-r", str(copyfrom_rev),
 646                          source_repos_url + copyfrom_path + "@" + str(copyfrom_rev), path_offset])
 647                 run_svn(["add", "--parents", path_offset])
 648                 # TODO: Need to copy SVN properties from source repos
 649
 650         elif action == 'D':
 651             # Queue "svn remove" commands, to allow the action == 'A' handling the opportunity
 652             # to do smart "svn copy" handling on copy/move/renames.
 653             removed_paths.append(path_offset)
 654
 655         elif action == 'R':
 656             # TODO
 657             display_error("Internal Error: Handling for action='R' not implemented yet.")
 658
 659         elif action == 'M':
 660             modified_paths.append(path_offset)
 661
 662         else:
 663             display_error("Internal Error: pull_svn_rev: Unhandled 'action' value: '" + action + "'")
 664
 665     if removed_paths:
 666         for r in removed_paths:
 667             # TODO: Is the "svn up" here needed?
 668             run_svn(["up", r])
 669             run_svn(["remove", "--force", r])
 670
 671     if modified_paths:
 672         for m in modified_paths:
 673             # TODO: Is the "svn up" here needed?
 674             run_svn(["up", m])
 675             m_url = source_url + "/" + m
 676             out = run_svn(["merge", "-c", str(svn_rev), "--non-recursive",
 677                      "--non-interactive", "--accept=theirs-full",
 678                      m_url+"@"+str(svn_rev), m])
 679
 680     if unrelated_paths:
 681         print "Unrelated paths: (vs. '" + source_base + "')"
 682         print "*", unrelated_paths
 683
 684     return commit_paths
 685
 686 def pull_svn_rev(log_entry, source_repos_url, source_url, target_url, keep_author=False):
 687     """
 688     Pull SVN changes from the given log entry.
 689     Returns the new SVN revision.
 690     If an exception occurs, it will rollback to revision 'svn_rev - 1'.
 691     """
 692     ## Get the relative offset of source_url based on source_repos_url, e.g. u'/branches/bug123'
 693     #source_base = source_url[len(source_repos_url):]
 694
 695     svn_rev = log_entry['revision']
 696     print "\n(Starting source rev #"+str(svn_rev)+":)"
 697     print "r"+str(log_entry['revision']) + " | " + \
 698           log_entry['author'] + " | " + \
 699           str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' '))
 700     print log_entry['message']
 701     print "------------------------------------------------------------------------"
 702     commit_paths = process_svn_log_entry(log_entry, source_repos_url, source_url, target_url)
 703
 704     # If we had too many individual paths to commit, wipe the list and just commit at
 705     # the root of the working copy.
 706     if len (commit_paths) > 99:
 707         commit_paths = []
 708
 709     # TODO: Use SVN properties to track source URL + rev in the target repo?
 710     #       This would provide a more reliable resume-support
 711     try:
 712         commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author)
 713     except ExternalCommandFailed:
 714         # try to ignore the Properties conflicts on files and dirs
 715         # use the copy from original_wc
 716         # TODO: Need to re-work this?
 717         #has_Conflict = False
 718         #for d in log_entry['changed_paths']:
 719         #    p = d['path']
 720         #    p = p[len(source_base):].strip("/")
 721         #    if os.path.isfile(p):
 722         #        if os.path.isfile(p + ".prej"):
 723         #            has_Conflict = True
 724         #            shutil.copy(original_wc + os.sep + p, p)
 725         #            p2=os.sep + p.replace('_', '__').replace('/', '_') \
 726         #                      + ".prej-" + str(svn_rev)
 727         #            shutil.move(p + ".prej", os.path.dirname(original_wc) + p2)
 728         #            w="\n### Properties conflicts ignored:"
 729         #            print "%s %s, in revision: %s\n" % (w, p, svn_rev)
 730         #    elif os.path.isdir(p):
 731         #        if os.path.isfile(p + os.sep + "dir_conflicts.prej"):
 732         #            has_Conflict = True
 733         #            p2=os.sep + p.replace('_', '__').replace('/', '_') \
 734         #                      + "_dir__conflicts.prej-" + str(svn_rev)
 735         #            shutil.move(p + os.sep + "dir_conflicts.prej",
 736         #                        os.path.dirname(original_wc) + p2)
 737         #            w="\n### Properties conflicts ignored:"
 738         #            print "%s %s, in revision: %s\n" % (w, p, svn_rev)
 739         #            out = run_svn(["propget", "svn:ignore",
 740         #                           original_wc + os.sep + p])
 741         #            if out:
 742         #                run_svn(["propset", "svn:ignore", out.strip(), p])
 743         #            out = run_svn(["propget", "svn:externel",
 744         #                           original_wc + os.sep + p])
 745         #            if out:
 746         #                run_svn(["propset", "svn:external", out.strip(), p])
 747         ## try again
 748         #if has_Conflict:
 749         #    commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author)
 750         #else:
 751             raise ExternalCommandFailed
 752     print "(Finished source rev #"+str(svn_rev)+")"
 753
 754
 755 def main():
 756     usage = "Usage: %prog [-a] [-c] [-r SVN rev] <Source SVN URL> <Target SVN URL>"
 757     parser = OptionParser(usage)
 758     parser.add_option("-a", "--keep-author", action="store_true",
 759                       dest="keep_author", help="Keep revision Author or not")
 760     parser.add_option("-c", "--continue-from-break", action="store_true",
 761                       dest="cont_from_break",
 762                       help="Continue from previous break")
 763     parser.add_option("-r", "--svn-rev", type="int", dest="svn_rev",
 764                       help="SVN revision to checkout from")
 765     (options, args) = parser.parse_args()
 766     if len(args) != 2:
 767         display_error("incorrect number of arguments\n\nTry: svn2svn.py --help",
 768                       False)
 769
 770     source_url = args.pop(0).rstrip("/")
 771     target_url = args.pop(0).rstrip("/")
 772     if options.keep_author:
 773         keep_author = True
 774     else:
 775         keep_author = False
 776
 777     # Find the greatest_rev in the source repo
 778     svn_info = get_svn_info(source_url)
 779     greatest_rev = svn_info['revision']
 780
 781     dup_wc = "_dup_wc"
 782
 783     # if old working copy does not exist, disable continue mode
 784     # TODO: Better continue support. Maybe include source repo's rev # in target commit info?
 785     if not os.path.exists(dup_wc):
 786         options.cont_from_break = False
 787
 788     if not options.cont_from_break:
 789         # Warn if Target SVN URL existed
 790         cmd = find_program("svn")
 791         pipe = Popen([cmd] + ["list"] + [target_url], executable=cmd,
 792                      stdout=PIPE, stderr=PIPE)
 793         out, err = pipe.communicate()
 794         if pipe.returncode == 0:
 795             print "Target SVN URL: %s existed!" % target_url
 796             if out:
 797                 print out
 798             print "Press 'Enter' to Continue, 'Ctrl + C' to Cancel..."
 799             print "(Timeout in 5 seconds)"
 800             rfds, wfds, efds = select.select([sys.stdin], [], [], 5)
 801
 802         # Get log entry for the SVN revision we will check out
 803         if options.svn_rev:
 804             # If specify a rev, get log entry just before or at rev
 805             svn_start_log = get_last_svn_log_entry(source_url, 1, options.svn_rev)
 806         else:
 807             # Otherwise, get log entry of branch creation
 808             svn_start_log = get_first_svn_log_entry(source_url, 1, greatest_rev)
 809
 810         # This is the revision we will start from for source_url
 811         svn_rev = svn_start_log['revision']
 812
 813         # Check out a working copy of target_url
 814         dup_wc = os.path.abspath(dup_wc)
 815         if os.path.exists(dup_wc):
 816             shutil.rmtree(dup_wc)
 817         svn_checkout(target_url, dup_wc)
 818         os.chdir(dup_wc)
 819
 820         # For the initial commit to the target URL, export all the contents from
 821         # the source URL at the start-revision.
 822         paths = run_svn(["list", "-r", str(svn_rev), source_url+"@"+str(svn_rev)])
 823         paths = paths.strip("\n").split("\n")
 824         for path in paths:
 825             if not path:
 826                 # Skip null lines
 827                 break
 828             # Directories have a trailing slash in the "svn list" output
 829             if path[-1] == "/":
 830                 path=path.rstrip('/')
 831                 if not os.path.exists(path):
 832                     os.makedirs(path)
 833             run_svn(["export", "--force", "-r" , str(svn_rev), source_url+"/"+path+"@"+str(svn_rev), path])
 834             run_svn(["add", path])
 835         commit_from_svn_log_entry(svn_start_log, [], keep_author)
 836     else:
 837         dup_wc = os.path.abspath(dup_wc)
 838         os.chdir(dup_wc)
 839
 840     # Get SVN info
 841     svn_info = get_svn_info(source_url)
 842     # Get the base URL for the source repos, e.g. u'svn://svn.example.com/svn/repo'
 843     source_repos_url = svn_info['repos_url']
 844
 845     if options.cont_from_break:
 846         svn_rev = svn_info['revision'] - 1
 847         if svn_rev < 1:
 848             svn_rev = 1
 849
 850     # Load SVN log starting from svn_rev + 1
 851     it_log_entries = iter_svn_log_entries(source_url, svn_rev + 1, greatest_rev)
 852
 853     try:
 854         for log_entry in it_log_entries:
 855             pull_svn_rev(log_entry, source_repos_url, source_url, target_url, keep_author)
 856
 857     except KeyboardInterrupt:
 858         print "\nStopped by user."
 859         run_svn(["cleanup"])
 860         run_svn(["revert", "--recursive", "."])
 861     except:
 862         print "\nCommand failed with following error:\n"
 863         traceback.print_exc()
 864         run_svn(["cleanup"])
 865         run_svn(["revert", "--recursive", "."])
 866     finally:
 867         run_svn(["up"])
 868         print "\nFinished!"
 869
 870
 871 if __name__ == "__main__":
 872     main()
 873
 874 # vim:sts=4:sw=4: