From b6c781db88be2a70293abbb7290b643528cd4aaa Mon Sep 17 00:00:00 2001 From: Tony Duckles Date: Sat, 11 Feb 2012 16:11:03 -0600 Subject: [PATCH 01/16] Updates to README --- README.mkd | 53 +++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/README.mkd b/README.mkd index ceee159..3ad35fe 100644 --- a/README.mkd +++ b/README.mkd @@ -4,22 +4,22 @@ Replicate (replay) changesets from one Subversion repository to another. Features -------- -- Meant for replaying history into an **empty** target location. This could be - an empty target repo or simply a brand-new folder/branch in the target repo. -- Maintains logical history (when possible), e.g. uses "svn copy" for renames. -- Maintains original commit messages. -- Optionally maintain source commit authors (`svn:author`). (*Requires non-authenticated +- **Meant for replaying history into an "empty target location**. This could be + an empty target repo or simply an empty folder/branch in the target repo. +- **Maintains logical history (when possible)**, e.g. uses "svn copy" for renames. +- **Maintains original commit messages**. +- **Optionally maintain source commit authors (`svn:author`)**. Requires non-authenticated access to the target repo, since this relies upon the `--username` SVN - command-line arg.*) -- Optionally maintain source commit timestamps (`svn:date`). (*Requires a + command-line arg. +- **Optionally maintain source commit timestamps (`svn:date`)**. Requires a "pre-revprop-change" hook script in the target repo, to be able to change - the "`svn:date`" revprops after target commits have been made.*) -- Optionally maintain identical revision #'s between source vs. target repo. - (*Effectively requires that you're replaying into an empty target repo, - or rather that the first source_url revision to be replayed is less than - the last target_url revision. Create blank "padding" revisions in the target - repo as needed.*) -- Optionally run an external shell script before each replayed commit + the "`svn:date`" revprops after target commits have been made. +- **Optionally maintain identical revision #'s between source vs. target repo**. + Effectively requires that you're replaying into an empty target repo, + or rather that the first source repo revision to be replayed is less than + the last target repo revision. Create blank "padding" revisions in the target + repo as needed. +- **Optionally run an external shell script before each replayed commit**, to give the ability to dynamically exclude or modify files as part of the replay. @@ -38,10 +38,35 @@ history into a new repository, so that things like "svn log" and "svn blame" will still show the correct (logical) history/ancestry, even though we end-up generating new commits which will have newer commit-dates and revision #'s. +While this replay will obviously run faster if you're running between both +a local source and target repositories, none of this *requires* direct +access to the repo server. You could access both the source and target repo's +over standard `http://`, `ssh://`, etc. methods. + Usage ----- See `svn2svn.py --help` +Side Effects +------------ +- The source repo is treated as strictly read-only. We do log/info/export/etc. + actions from the source repo, to get the history to replay and to get the + file contents at each step along teh way. +- You must have commit access to the target repo. Additionally, for some of + the optional command-line args, you'll need access to the target repo to + setup hook scripts, e.g. "pre-revprop-change". +- This script will create some folders off of your current working directory: + - "`_wc_target`": This is the checkout of the target\_url, where we replay + actions into and where we commit to the target repo. You can safely + remove this directory after a run, and the script will do a fresh + "svn checkout" (if needed) when starting the next time. + - "`_tmp_wc_target`": This is a temporary folder, which will only be created + if using `--keep-revnum` mode and it should only exist for brief periods + of time. This is where we commit dummy/padding revisions to the target repo, + checking out the root folder of the target repo and modifying a + "`svn2svn:keep-revnum`" property, i.e. a small change to trigger a commit + and in a location that will likely go un-noticed in the final target repo. + Examples -------- **Create a copy of only /trunk from source repo, starting at r5000** -- 2.45.2 From 6a875630e09cf3c297f6e91d76aec98c3c75a1a6 Mon Sep 17 00:00:00 2001 From: Tony Duckles Date: Sun, 12 Feb 2012 11:34:47 -0600 Subject: [PATCH 02/16] Correctly handle --keep-revnum mode during initial import --- svn2svn/run/svn2svn.py | 68 ++++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 29 deletions(-) diff --git a/svn2svn/run/svn2svn.py b/svn2svn/run/svn2svn.py index 9cbb87d..833075f 100644 --- a/svn2svn/run/svn2svn.py +++ b/svn2svn/run/svn2svn.py @@ -181,9 +181,9 @@ def find_svn_ancestors(svn_repos_url, base_path, source_path, source_rev, prefix 'svn_repos_url' is the full URL to the root of the SVN repository, e.g. 'file:///path/to/repo' 'base_path' is the path in the SVN repo to the target path we're trying to - trace ancestry back to, e.g. 'trunk'. + trace ancestry back to, e.g. '/trunk'. 'source_path' is the path in the SVN repo to the source path to start checking - ancestry at, e.g. 'branches/fix1/projectA/file1.txt'. + ancestry at, e.g. '/branches/fix1/projectA/file1.txt'. (full_path = svn_repos_url+base_path+"/"+path_offset) 'source_rev' is the revision to start walking the history of source_path backwards from. """ @@ -620,6 +620,32 @@ def process_svn_log_entry(log_entry, commit_paths, prefix = ""): run_svn(["export", "--force", "-r", source_rev, source_url+"/"+path_offset+"@"+str(source_rev), path_offset]) +def keep_revnum(source_rev, target_rev_last, wc_target_tmp): + """ + Add "padding" target revisions as needed to keep source and target + revision #'s identical. + """ + if int(source_rev) <= int(target_rev_last): + raise InternalError("keep-revnum mode is enabled, " + "but source revision (r%s) is less-than-or-equal last target revision (r%s)" % \ + (source_rev, target_rev_last)) + if int(target_rev_last) < int(source_rev)-1: + # Add "padding" target revisions to keep source and target rev #'s identical + if os.path.exists(wc_target_tmp): + shutil.rmtree(wc_target_tmp) + run_svn(["checkout", "-r", "HEAD", "--depth=empty", target_repos_url, wc_target_tmp]) + for rev_num in range(int(target_rev_last)+1, int(source_rev)): + run_svn(["propset", "svn2svn:keep-revnum", rev_num, wc_target_tmp]) + output = run_svn(["commit", "-m", "", wc_target_tmp]) + rev_num_tmp = parse_svn_commit_rev(output) if output else None + assert rev_num == rev_num_tmp + ui.status("Committed revision %s (keep-revnum).", rev_num) + target_rev_last = rev_num + shutil.rmtree(wc_target_tmp) + # Update our target working-copy, to ensure everything says it's at the new HEAD revision + run_svn(["update"]) + return target_rev_last + def disp_svn_log_summary(log_entry): ui.status("------------------------------------------------------------------------") ui.status("r%s | %s | %s", @@ -661,7 +687,7 @@ def real_main(args, parser): # as a sanity check, so we check if the pre-revprop-change hook script is correctly setup # before doing first replay-commit? - target_last_rev = target_info['revision'] # Last revision # in the target repo + target_rev_last = target_info['revision'] # Last revision # in the target repo target_repos_url = target_info['repos_url'] wc_target = os.path.abspath('_wc_target') wc_target_tmp = os.path.abspath('_tmp_wc_target') @@ -695,11 +721,12 @@ def real_main(args, parser): source_start_rev = source_rev = int(source_start_log['revision']) ui.status("Starting at source revision %s.", source_start_rev, level=ui.VERBOSE) ui.status("") + if options.keep_revnum: + target_rev_last = keep_revnum(source_rev, target_rev_last, wc_target_tmp) # For the initial commit to the target URL, export all the contents from # the source URL at the start-revision. disp_svn_log_summary(svnclient.get_one_svn_log_entry(source_url, source_rev, source_rev)) - ui.status("(Initial import)", level=ui.VERBOSE) # Export and add file-contents from source_url@source_start_rev top_paths = run_svn(["list", "-r", source_rev, source_url+"@"+str(source_rev)]) top_paths = top_paths.strip("\n").split("\n") @@ -727,7 +754,7 @@ def real_main(args, parser): # Directories have a trailing slash in the "svn list" output path_is_dir = True if path[-1] == "/" else False path_offset = path.rstrip('/') if path_is_dir else path - ui.status(" A %s", source_url[len(source_repos_url):]+"/"+path_offset, level=ui.VERBOSE) + ui.status(" A %s", source_base+"/"+path_offset, level=ui.VERBOSE) if options.keep_prop: sync_svn_props(source_url, source_rev, path_offset) # Commit the initial import @@ -740,10 +767,10 @@ def real_main(args, parser): # Update our target working-copy, to ensure everything says it's at the new HEAD revision run_svn(["update"]) commit_count += 1 - target_last_rev = target_rev + target_rev_last = target_rev else: # Re-build the rev_map based on any already-replayed history in target_url - build_rev_map(target_url, target_last_rev, source_info) + build_rev_map(target_url, target_rev_last, source_info) if not rev_map: parser.error("called with continue-mode, but no already-replayed source history found in target_url") source_start_rev = int(max(rev_map, key=rev_map.get)) @@ -751,7 +778,7 @@ def real_main(args, parser): ui.status("Continuing from source revision %s.", source_start_rev, level=ui.VERBOSE) ui.status("") - if options.keep_revnum and source_start_rev < target_last_rev: + if options.keep_revnum and source_start_rev < target_rev_last: parser.error("last target revision is equal-or-higher than starting source revision; " "cannot use --keep-revnum mode") @@ -773,24 +800,7 @@ def real_main(args, parser): # Replay this revision from source_url into target_url source_rev = log_entry['revision'] if options.keep_revnum: - if int(source_rev) <= int(target_last_rev): - raise InternalError("keep-revnum mode is enabled, " - "but source revision (r%s) is less-than-or-equal last target revision (r%s)" % \ - (source_rev, target_last_rev)) - if int(target_last_rev) < int(source_rev)-1: - # Add "padding" target revisions to keep source and target rev #'s identical - if os.path.exists(wc_target_tmp): - shutil.rmtree(wc_target_tmp) - run_svn(["checkout", "-r", "HEAD", "--depth=empty", target_repos_url, wc_target_tmp]) - for rev_num in range(int(target_last_rev)+1, int(source_rev)): - run_svn(["propset", "svn2svn:keep-revnum", rev_num, wc_target_tmp]) - output = run_svn(["commit", "-m", "", wc_target_tmp]) - rev_num_tmp = parse_svn_commit_rev(output) if output else None - assert rev_num == rev_num_tmp - ui.status("Committed revision %s (keep-revnum).", rev_num) - target_last_rev = rev_num - shutil.rmtree(wc_target_tmp) - run_svn(["update"]) + target_rev_last = keep_revnum(source_rev, target_rev_last, wc_target_tmp) disp_svn_log_summary(log_entry) # Process all the changed-paths in this log entry commit_paths = [] @@ -803,7 +813,7 @@ def real_main(args, parser): # Update rev_map, mapping table of source-repo rev # -> target-repo rev # source_rev = log_entry['revision'] set_rev_map(source_rev, target_rev) - target_last_rev = target_rev + target_rev_last = target_rev # Update our target working-copy, to ensure everything says it's at the new HEAD revision run_svn(["update"]) commit_count += 1 @@ -812,8 +822,8 @@ def real_main(args, parser): run_svn(["cleanup"]) if not source_rev: # If there were no new source_url revisions to process, init source_rev - # for the "finally" message below. - source_rev = source_end_rev + # for the "finally" message below to be the last source revision replayed. + source_rev = source_start_rev except KeyboardInterrupt: print "\nStopped by user." -- 2.45.2 From 93a03c1de2c95f6b822591916639d69b55794eef Mon Sep 17 00:00:00 2001 From: Tony Duckles Date: Wed, 15 Feb 2012 19:42:53 -0600 Subject: [PATCH 03/16] Fix another --keep-revnum edge-case --- svn2svn/run/svn2svn.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/svn2svn/run/svn2svn.py b/svn2svn/run/svn2svn.py index 833075f..51d2788 100644 --- a/svn2svn/run/svn2svn.py +++ b/svn2svn/run/svn2svn.py @@ -671,6 +671,8 @@ def real_main(args, parser): source_repos_url = source_info['repos_url'] # e.g. 'http://server/svn/source' source_base = source_url[len(source_repos_url):] # e.g. '/trunk' source_repos_uuid = source_info['repos_uuid'] + global target_repos_url + target_repos_url = target_info['repos_url'] # Init start and end revision try: @@ -688,7 +690,6 @@ def real_main(args, parser): # before doing first replay-commit? target_rev_last = target_info['revision'] # Last revision # in the target repo - target_repos_url = target_info['repos_url'] wc_target = os.path.abspath('_wc_target') wc_target_tmp = os.path.abspath('_tmp_wc_target') num_entries_proc = 0 @@ -721,7 +722,7 @@ def real_main(args, parser): source_start_rev = source_rev = int(source_start_log['revision']) ui.status("Starting at source revision %s.", source_start_rev, level=ui.VERBOSE) ui.status("") - if options.keep_revnum: + if options.keep_revnum and source_rev > target_rev_last: target_rev_last = keep_revnum(source_rev, target_rev_last, wc_target_tmp) # For the initial commit to the target URL, export all the contents from -- 2.45.2 From a7a2f5d592f7065e962b74876e87c0ec5f80965c Mon Sep 17 00:00:00 2001 From: Tony Duckles Date: Wed, 15 Feb 2012 14:48:14 -0600 Subject: [PATCH 04/16] Fix a place in find_svn_ancestors() which should've been using is_child_path(). Display progress messages during build_rev_map(), for target repo's with lots of target_url commits to check. --- svn2svn/run/svn2svn.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/svn2svn/run/svn2svn.py b/svn2svn/run/svn2svn.py index 51d2788..5f04c73 100644 --- a/svn2svn/run/svn2svn.py +++ b/svn2svn/run/svn2svn.py @@ -213,7 +213,7 @@ def find_svn_ancestors(svn_repos_url, base_path, source_path, source_rev, prefix changed_paths_temp = [] for d in log_entry['changed_paths']: path = d['path'] - if path in working_path: + if is_child_path(working_path, path): changed_paths_temp.append({'path': path, 'data': d}) if not changed_paths_temp: # If no matches, then we've hit the end of the chain and this path has no ancestry back to base_path. @@ -295,8 +295,9 @@ def get_rev_map(source_rev, prefix): ui.status(prefix + ">> get_rev_map(%s)", source_rev, level=ui.DEBUG, color='GREEN') # Find the highest entry less-than-or-equal-to source_rev for rev in range(int(source_rev), 0, -1): - ui.status(prefix + ">> get_rev_map: rev=%s in_rev_map=%s", rev, str(rev in rev_map), level=ui.DEBUG, color='BLACK_B') - if rev in rev_map: + in_rev_map = True if rev in rev_map else False + ui.status(prefix + ">> get_rev_map: rev=%s in_rev_map=%s", rev, str(in_rev_map), level=ui.DEBUG, color='BLACK_B') + if in_rev_map: return int(rev_map[rev]) # Else, we fell off the bottom of the rev_map. Ruh-roh... return None @@ -328,6 +329,9 @@ def build_rev_map(target_url, target_end_rev, source_info): source_rev = revprops['svn2svn:source_rev'] target_rev = log_entry['revision'] set_rev_map(source_rev, target_rev) + proc_count += 1 + if proc_count % 500 == 0: + ui.status("...processed %s (%s of %s)..." % (proc_count, target_rev, target_end_rev), level=ui.VERBOSE) def get_svn_dirlist(svn_path, rev_number = ""): """ -- 2.45.2 From babf68a770639fad693f5a987c4dfa752bdca26a Mon Sep 17 00:00:00 2001 From: Tony Duckles Date: Sat, 18 Feb 2012 16:35:04 -0600 Subject: [PATCH 05/16] Fix process_svn_log_entry() to not include action="M" paths in skip_paths, so that do_svn_add() will try to check+create correct ancestry. Update process_svn_log_entry() to calculate 'kind' if kind='none'. --- svn2svn/run/svn2svn.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/svn2svn/run/svn2svn.py b/svn2svn/run/svn2svn.py index 5f04c73..08cb865 100644 --- a/svn2svn/run/svn2svn.py +++ b/svn2svn/run/svn2svn.py @@ -515,7 +515,7 @@ def process_svn_log_entry(log_entry, commit_paths, prefix = ""): # Ignore changed files that are not part of this subdir ui.status(prefix + ">> process_svn_log_entry: Unrelated path: %s (base: %s)", path, source_base, level=ui.DEBUG, color='GREEN') continue - if d['kind'] == "": + if d['kind'] == "" or d['kind'] == 'none': # The "kind" value was introduced in SVN 1.6, and "svn log --xml" won't return a "kind" # value for commits made on a pre-1.6 repo, even if the server is now running 1.6. # We need to use other methods to fetch the node-kind for these cases. @@ -568,10 +568,13 @@ def process_svn_log_entry(log_entry, commit_paths, prefix = ""): skip_paths = [] for tmp_d in log_entry['changed_paths']: tmp_path = tmp_d['path'] - if is_child_path(tmp_path, path): + if is_child_path(tmp_path, path) and tmp_d['action'] in 'ARD': # Build list of child entries which are also in the changed_paths list, # so that do_svn_add() can skip processing these entries when recursing - # since we'll end-up processing them later. + # since we'll end-up processing them later. Don't include action="M" paths + # in this list because it's non-conclusive: it could just mean that the + # file was modified *after* the copy-from, so we still want do_svn_add() + # to re-create the correct ancestry. tmp_path_offset = tmp_path[len(source_base):].strip("/") skip_paths.append(tmp_path_offset) do_svn_add(path_offset, source_rev, "", "", export_paths, path_is_dir, skip_paths, prefix+" ") -- 2.45.2 From 60f5184ab6031730c50612aa70995918274337bd Mon Sep 17 00:00:00 2001 From: Tony Duckles Date: Sat, 18 Feb 2012 16:36:43 -0600 Subject: [PATCH 06/16] Update parse_svn_status_xml() to use defensive conditional rather than assert for base_dir prefix checking --- svn2svn/svnclient.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/svn2svn/svnclient.py b/svn2svn/svnclient.py index 9761648..de9d799 100644 --- a/svn2svn/svnclient.py +++ b/svn2svn/svnclient.py @@ -155,8 +155,7 @@ def parse_svn_status_xml(xml_string, base_dir=None, ignore_externals=False): for entry in tree.findall('.//entry'): d = {} path = entry.get('path') - if base_dir is not None: - assert os.path.normcase(path).startswith(base_dir) + if base_dir is not None and os.path.normcase(path).startswith(base_dir): path = path[len(base_dir):].lstrip('/\\') d['path'] = path wc_status = entry.find('wc-status') -- 2.45.2 From d4d220b2b19a5cc938f8a0a84af43660ce80d801 Mon Sep 17 00:00:00 2001 From: Tony Duckles Date: Sat, 18 Feb 2012 16:42:09 -0600 Subject: [PATCH 07/16] Perf: Only "svn update" when needed. This means a mixed-revision WC, but much-faster replays for large WC's. --- svn2svn/run/svn2svn.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/svn2svn/run/svn2svn.py b/svn2svn/run/svn2svn.py index 08cb865..da77332 100644 --- a/svn2svn/run/svn2svn.py +++ b/svn2svn/run/svn2svn.py @@ -303,7 +303,7 @@ def get_rev_map(source_rev, prefix): return None def set_rev_map(source_rev, target_rev): - ui.status(">> set_rev_map: source_rev=%s target_rev=%s", source_rev, target_rev, level=ui.DEBUG, color='GREEN') + #ui.status(">> set_rev_map: source_rev=%s target_rev=%s", source_rev, target_rev, level=ui.DEBUG, color='GREEN') global rev_map rev_map[int(source_rev)]=int(target_rev) @@ -434,6 +434,7 @@ def do_svn_add(path_offset, source_rev, parent_copyfrom_path="", parent_copyfrom if path_in_svn: # If local file is already under version-control, then this is a replace. ui.status(prefix + ">> do_svn_add: pre-copy: local path already exists: %s", path_offset, level=ui.DEBUG, color='GREEN') + run_svn(["update", path_offset]) run_svn(["remove", "--force", path_offset]) run_svn(["copy", "-r", tgt_rev, target_url+"/"+copyfrom_offset+"@"+str(tgt_rev), path_offset]) if is_dir: @@ -494,6 +495,7 @@ def do_svn_add_dir(path_offset, source_rev, parent_copyfrom_path, parent_copyfro for path in paths_local: if not path in paths_remote: ui.status(" %s %s", 'D', source_base+"/"+path_offset+"/"+path, level=ui.VERBOSE) + run_svn(["update", path_offset+"/"+path]) run_svn(["remove", "--force", path_offset+"/"+path]) # TODO: Does this handle deleted folders too? Wouldn't want to have a case # where we only delete all files from folder but leave orphaned folder around. @@ -553,6 +555,10 @@ def process_svn_log_entry(log_entry, commit_paths, prefix = ""): # child folder under that parent folder is a "replace" action on the final # merge to trunk. Since the child folders will be in skip_paths, do_svn_add # wouldn't have created them while processing the parent "add" path. + if path_is_dir: + # Need to "svn update" before "svn remove" in case child contents are at + # a higher rev than the (parent) path_offset. + run_svn(["update", path_offset]) run_svn(["remove", "--force", path_offset]) action = 'A' @@ -608,12 +614,24 @@ def process_svn_log_entry(log_entry, commit_paths, prefix = ""): sync_svn_props(source_url, source_rev, path_offset) elif action == 'D': + if path_is_dir: + # For dirs, need to "svn update" before "svn remove" because the final + # "svn commit" will fail if the parent (path_offset) is at a lower rev + # than any of the child contents. This needs to be a recursive update. + run_svn(["update", path_offset]) run_svn(["remove", "--force", path_offset]) elif action == 'M': if path_is_file: run_svn(["export", "--force", "-N" , "-r", source_rev, source_url+"/"+path_offset+"@"+str(source_rev), path_offset]) + if path_is_dir: + # For dirs, need to "svn update" before export/prop-sync because the + # final "svn commit" will fail if the parent is at a lower rev than + # child contents. Just need to update the rev-state of the dir (d['path']), + # don't need to recursively update all child contents. + # (??? is this the right reason?) + run_svn(["update", "-N", path_offset]) if options.keep_prop: sync_svn_props(source_url, source_rev, path_offset) @@ -649,8 +667,6 @@ def keep_revnum(source_rev, target_rev_last, wc_target_tmp): ui.status("Committed revision %s (keep-revnum).", rev_num) target_rev_last = rev_num shutil.rmtree(wc_target_tmp) - # Update our target working-copy, to ensure everything says it's at the new HEAD revision - run_svn(["update"]) return target_rev_last def disp_svn_log_summary(log_entry): @@ -772,8 +788,6 @@ def real_main(args, parser): if target_rev: # Update rev_map, mapping table of source-repo rev # -> target-repo rev # set_rev_map(source_rev, target_rev) - # Update our target working-copy, to ensure everything says it's at the new HEAD revision - run_svn(["update"]) commit_count += 1 target_rev_last = target_rev else: @@ -822,8 +836,6 @@ def real_main(args, parser): source_rev = log_entry['revision'] set_rev_map(source_rev, target_rev) target_rev_last = target_rev - # Update our target working-copy, to ensure everything says it's at the new HEAD revision - run_svn(["update"]) commit_count += 1 # Run "svn cleanup" every 100 commits if SVN 1.7+, to clean-up orphaned ".svn/pristines/*" if svn_vers >= 1.7 and (commit_count % 100 == 0): -- 2.45.2 From a19aef402ea9a3f447d7b4b55246f6b8b82577c0 Mon Sep 17 00:00:00 2001 From: Tony Duckles Date: Sat, 18 Feb 2012 16:44:50 -0600 Subject: [PATCH 08/16] Ensure target WC is clean/reverted when continuing. --- svn2svn/run/svn2svn.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/svn2svn/run/svn2svn.py b/svn2svn/run/svn2svn.py index da77332..b3000ab 100644 --- a/svn2svn/run/svn2svn.py +++ b/svn2svn/run/svn2svn.py @@ -729,6 +729,11 @@ def real_main(args, parser): ui.status("Checking-out _wc_target...", level=ui.VERBOSE) svnclient.svn_checkout(target_url, wc_target) os.chdir(wc_target) + if wc_exists: + # If using an existing WC, make sure it's clean ("svn revert") + ui.status("Cleaning-up _wc_target...", level=ui.VERBOSE) + run_svn(["cleanup"]) + full_svn_revert() if not options.cont_from_break: # TODO: Warn user if trying to start (non-continue) into a non-empty target path? -- 2.45.2 From 4c553f8676513e69ed5e04eb476d699c0737b917 Mon Sep 17 00:00:00 2001 From: Tony Duckles Date: Mon, 5 Mar 2012 22:01:28 -0600 Subject: [PATCH 09/16] Refactor find_svn_ancestors() to make more generic * svn2svn/run/svn2svn.py (find_svn_ancestors): Make 'stop_base_path' optional so this function can be used generally to get the full ancestry history. * svn2svn/svnclient.py (get_first_svn_log_entry, get_last_svn_log_entry): Add optional 'stop_on_copy' param and pass through to get_one_svn_log_entry(). --- svn2svn/run/svn2svn.py | 89 +++++++++++++++++++++++------------------- svn2svn/svnclient.py | 8 ++-- 2 files changed, 53 insertions(+), 44 deletions(-) diff --git a/svn2svn/run/svn2svn.py b/svn2svn/run/svn2svn.py index b3000ab..570dc27 100644 --- a/svn2svn/run/svn2svn.py +++ b/svn2svn/run/svn2svn.py @@ -166,46 +166,47 @@ def in_svn(p, require_in_repo=False, prefix=""): def is_child_path(path, p_path): return True if (path == p_path) or (path.startswith(p_path+"/")) else False -def find_svn_ancestors(svn_repos_url, base_path, source_path, source_rev, prefix = ""): +def find_svn_ancestors(svn_repos_url, start_path, start_rev, stop_base_path="", prefix=""): """ - Given a source path, walk the SVN history backwards to inspect the ancestory of - that path, seeing if it traces back to base_path. Build an array of copyfrom_path - and copyfrom_revision pairs for each of the "svn copies". If we find a copyfrom_path - which base_path is a substring match of (e.g. we crawled back to the initial branch- - copy from trunk), then return the collection of ancestor paths. Otherwise, - copyfrom_path has no ancestory compared to base_path. + Given an initial starting path+rev, walk the SVN history backwards to inspect the + ancestry of that path, optionally seeing if it traces back to stop_base_path. + + Build an array of copyfrom_path and copyfrom_revision pairs for each of the "svn copy"'s. + If we find a copyfrom_path which stop_base_path is a substring match of (e.g. we crawled + back to the initial branch-copy from trunk), then return the collection of ancestor + paths. Otherwise, copyfrom_path has no ancestry compared to stop_base_path. This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a file/folder was renamed in a branch and then that branch was merged back to trunk. 'svn_repos_url' is the full URL to the root of the SVN repository, e.g. 'file:///path/to/repo' - 'base_path' is the path in the SVN repo to the target path we're trying to - trace ancestry back to, e.g. '/trunk'. - 'source_path' is the path in the SVN repo to the source path to start checking + 'start_path' is the path in the SVN repo to the source path to start checking ancestry at, e.g. '/branches/fix1/projectA/file1.txt'. - (full_path = svn_repos_url+base_path+"/"+path_offset) - 'source_rev' is the revision to start walking the history of source_path backwards from. + 'start_rev' is the revision to start walking the history of start_path backwards from. + 'stop_base_path' is the path in the SVN repo to stop tracing ancestry once we've reached, + i.e. the target path we're trying to trace ancestry back to, e.g. '/trunk'. """ - ui.status(prefix + ">> find_svn_ancestors: Start: (%s) source_path: %s base_path: %s", - svn_repos_url, source_path+"@"+str(source_rev), base_path, level=ui.DEBUG, color='YELLOW') + ui.status(prefix + ">> find_svn_ancestors: Start: (%s) start_path: %s stop_base_path: %s", + svn_repos_url, start_path+"@"+str(start_rev), stop_base_path, level=ui.DEBUG, color='YELLOW') done = False - working_path = base_path+"/"+source_path - working_rev = source_rev + no_ancestry = False + cur_path = start_path + cur_rev = start_rev first_iter_done = False ancestors_temp = [] while not done: - # Get the first "svn log" entry for this path (relative to @rev) - ui.status(prefix + ">> find_svn_ancestors: %s", svn_repos_url + working_path+"@"+str(working_rev), level=ui.DEBUG, color='YELLOW') - log_entry = svnclient.get_first_svn_log_entry(svn_repos_url + working_path, 1, working_rev, True) + # Get the first "svn log" entry for cur_path (relative to @cur_rev) + ui.status(prefix + ">> find_svn_ancestors: %s", svn_repos_url+cur_path+"@"+str(cur_rev), level=ui.DEBUG, color='YELLOW') + log_entry = svnclient.get_first_svn_log_entry(svn_repos_url+cur_path, 1, cur_rev) if not log_entry: ui.status(prefix + ">> find_svn_ancestors: Done: no log_entry", level=ui.DEBUG, color='YELLOW') done = True break - # If we found a copy-from case which matches our base_path, we're done. + # If we found a copy-from case which matches our stop_base_path, we're done. # ...but only if we've at least tried to search for the first copy-from path. - if first_iter_done and is_child_path(working_path, base_path): - ui.status(prefix + ">> find_svn_ancestors: Done: Found is_child_path(working_path, base_path) and first_iter_done=True", level=ui.DEBUG, color='YELLOW') + if stop_base_path and first_iter_done and is_child_path(cur_path, stop_base_path): + ui.status(prefix + ">> find_svn_ancestors: Done: Found is_child_path(cur_path, stop_base_path) and first_iter_done=True", level=ui.DEBUG, color='YELLOW') done = True break first_iter_done = True @@ -213,17 +214,18 @@ def find_svn_ancestors(svn_repos_url, base_path, source_path, source_rev, prefix changed_paths_temp = [] for d in log_entry['changed_paths']: path = d['path'] - if is_child_path(working_path, path): + if is_child_path(cur_path, path): changed_paths_temp.append({'path': path, 'data': d}) if not changed_paths_temp: - # If no matches, then we've hit the end of the chain and this path has no ancestry back to base_path. + # If no matches, then we've hit the end of the ancestry-chain. ui.status(prefix + ">> find_svn_ancestors: Done: No matching changed_paths", level=ui.DEBUG, color='YELLOW') done = True continue # Reverse-sort any matches, so that we start with the most-granular (deepest in the tree) path. changed_paths = sorted(changed_paths_temp, key=operator.itemgetter('path'), reverse=True) - # Find the action for our working_path in this revision. Use a loop to check in reverse order, - # so that if the target file/folder is "M" but has a parent folder with an "A" copy-from. + # Find the action for our cur_path in this revision. Use a loop to check in reverse order, + # so that if the target file/folder is "M" but has a parent folder with an "A" copy-from + # then we still correctly match the deepest copy-from. for v in changed_paths: d = v['data'] path = d['path'] @@ -236,15 +238,17 @@ def find_svn_ancestors(svn_repos_url, base_path, source_path, source_rev, prefix (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "", level=ui.DEBUG, color='YELLOW') if action == 'D': - # If file/folder was deleted, it has no ancestor - ancestors_temp = [] + # If file/folder was deleted, ancestry-chain stops here + if stop_base_path: + no_ancestry = True ui.status(prefix + ">> find_svn_ancestors: Done: deleted", level=ui.DEBUG, color='YELLOW') done = True break if action in 'RA': - # If file/folder was added/replaced but not a copy, it has no ancestor + # If file/folder was added/replaced but not a copy, ancestry-chain stops here if not d['copyfrom_path']: - ancestors_temp = [] + if stop_base_path: + no_ancestry = True ui.status(prefix + ">> find_svn_ancestors: Done: %s with no copyfrom_path", "Added" if action == "A" else "Replaced", level=ui.DEBUG, color='YELLOW') @@ -257,19 +261,24 @@ def find_svn_ancestors(svn_repos_url, base_path, source_path, source_rev, prefix level=ui.DEBUG, color='YELLOW') ancestors_temp.append({'path': path, 'revision': log_entry['revision'], 'copyfrom_path': d['copyfrom_path'], 'copyfrom_rev': d['copyfrom_revision']}) - working_path = working_path.replace(d['path'], d['copyfrom_path']) - working_rev = d['copyfrom_revision'] + cur_path = cur_path.replace(d['path'], d['copyfrom_path']) + cur_rev = d['copyfrom_revision'] # Follow the copy and keep on searching break ancestors = [] + if stop_base_path and no_ancestry: + # If we're tracing back ancestry to a specific target stop_base_path and + # the ancestry-chain stopped before we reached stop_base_path, then return + # nothing since there is no ancestry chaining back to that target. + ancestors_temp = [] if ancestors_temp: - ancestors.append({'path': base_path+"/"+source_path, 'revision': source_rev}) - working_path = base_path+"/"+source_path + ancestors.append({'path': start_path, 'revision': start_rev}) + cur_path = start_path for idx in range(len(ancestors_temp)): d = ancestors_temp[idx] - working_path = working_path.replace(d['path'], d['copyfrom_path']) - working_rev = d['copyfrom_rev'] - ancestors.append({'path': working_path, 'revision': working_rev}) + cur_path = cur_path.replace(d['path'], d['copyfrom_path']) + cur_rev = d['copyfrom_rev'] + ancestors.append({'path': cur_path, 'revision': cur_rev}) if ui.get_level() >= ui.DEBUG: max_len = 0 for idx in range(len(ancestors)): @@ -285,7 +294,7 @@ def find_svn_ancestors(svn_repos_url, base_path, source_path, source_rev, prefix level=ui.DEBUG, color='YELLOW') else: ui.status(prefix + ">> find_svn_ancestors: No ancestor-chain found: %s", - svn_repos_url+base_path+"/"+source_path+"@"+str(source_rev), level=ui.DEBUG, color='YELLOW') + svn_repos_url+start_path+"@"+str(start_rev), level=ui.DEBUG, color='YELLOW') return ancestors def get_rev_map(source_rev, prefix): @@ -388,13 +397,13 @@ def do_svn_add(path_offset, source_rev, parent_copyfrom_path="", parent_copyfrom level=ui.DEBUG, color='GREEN') # Check if the given path has ancestors which chain back to the current source_base found_ancestor = False - ancestors = find_svn_ancestors(source_repos_url, source_base, path_offset, source_rev, prefix+" ") + ancestors = find_svn_ancestors(source_repos_url, source_base+"/"+path_offset, source_rev, source_base, prefix+" ") # ancestors[n] is the original (pre-branch-copy) trunk path. # ancestors[n-1] is the first commit on the new branch. copyfrom_path = ancestors[len(ancestors)-1]['path'] if ancestors else "" copyfrom_rev = ancestors[len(ancestors)-1]['revision'] if ancestors else "" if ancestors: - # The copy-from path has ancestory back to source_url. + # The copy-from path has ancestry back to source_url. ui.status(prefix + ">> do_svn_add: Check copy-from: Found parent: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN', bold=True) found_ancestor = True diff --git a/svn2svn/svnclient.py b/svn2svn/svnclient.py index de9d799..301a5a1 100644 --- a/svn2svn/svnclient.py +++ b/svn2svn/svnclient.py @@ -264,7 +264,7 @@ def get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=False, get_c raise EmptySVNLog("No SVN log for %s between revisions %s and %s" % (svn_url, rev_start, rev_end)) -def get_first_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True): +def get_first_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True, get_changed_paths=True): """ Get the first log entry after (or at) the given revision number in an SVN branch. By default the revision number is set to 0, which will give you the log @@ -274,15 +274,15 @@ def get_first_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True) a copy from another branch, inspect elements of the 'changed_paths' entry in the returned dictionary. """ - return get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True, get_changed_paths=True) + return get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=stop_on_copy, get_changed_paths=get_changed_paths) -def get_last_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True): +def get_last_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True, get_changed_paths=True): """ Get the last log entry before/at the given revision number in an SVN branch. By default the revision number is set to HEAD, which will give you the log entry corresponding to the latest commit in branch. """ - return get_one_svn_log_entry(svn_url, rev_end, rev_start, stop_on_copy=True, get_changed_paths=True) + return get_one_svn_log_entry(svn_url, rev_end, rev_start, stop_on_copy=stop_on_copy, get_changed_paths=get_changed_paths) log_duration_threshold = 10.0 -- 2.45.2 From c608d0b6b7112b5e356e198aa1ff9cf9dea71d04 Mon Sep 17 00:00:00 2001 From: Tony Duckles Date: Wed, 14 Mar 2012 20:13:32 -0500 Subject: [PATCH 10/16] Correctly crawl source_url's ancestry back to origin, if any * svn2svn/svnclient.py (iter_svn_log_entries): Support new optional 'ancestors' param. Use that to correctly grab revision-chunks, following the ancestry history. * svn2svn/run/svn2svn.py (join_path, in_ancestors): New functions. * svn2svn/run/svn2svn.py (real_main): Use find_svn_ancestors() to get ancestry of source_url and pass that to downstream functions. --- svn2svn/run/svn2svn.py | 115 +++++++++++++++++++++++-------------- svn2svn/svnclient.py | 40 +++++++++++-- tests/check-replay-repo.sh | 1 + tests/make-replay-repo.sh | 5 ++ 4 files changed, 113 insertions(+), 48 deletions(-) diff --git a/svn2svn/run/svn2svn.py b/svn2svn/run/svn2svn.py index 570dc27..40bc204 100644 --- a/svn2svn/run/svn2svn.py +++ b/svn2svn/run/svn2svn.py @@ -125,7 +125,7 @@ def sync_svn_props(source_url, source_rev, path_offset): Carry-forward any unversioned properties from the source repo to the target WC. """ - source_props = svnclient.get_all_props(source_url+"/"+path_offset, source_rev) + source_props = svnclient.get_all_props(join_path(source_url, path_offset), source_rev) target_props = svnclient.get_all_props(path_offset) if 'svn:mergeinfo' in source_props: # Never carry-forward "svn:mergeinfo" @@ -166,7 +166,11 @@ def in_svn(p, require_in_repo=False, prefix=""): def is_child_path(path, p_path): return True if (path == p_path) or (path.startswith(p_path+"/")) else False -def find_svn_ancestors(svn_repos_url, start_path, start_rev, stop_base_path="", prefix=""): +def join_path(base, child): + base.rstrip('/') + return base+"/"+child if child else base + +def find_svn_ancestors(svn_repos_url, start_path, start_rev, stop_base_path=None, prefix=""): """ Given an initial starting path+rev, walk the SVN history backwards to inspect the ancestry of that path, optionally seeing if it traces back to stop_base_path. @@ -205,7 +209,7 @@ def find_svn_ancestors(svn_repos_url, start_path, start_rev, stop_base_path="", break # If we found a copy-from case which matches our stop_base_path, we're done. # ...but only if we've at least tried to search for the first copy-from path. - if stop_base_path and first_iter_done and is_child_path(cur_path, stop_base_path): + if stop_base_path is not None and first_iter_done and is_child_path(cur_path, stop_base_path): ui.status(prefix + ">> find_svn_ancestors: Done: Found is_child_path(cur_path, stop_base_path) and first_iter_done=True", level=ui.DEBUG, color='YELLOW') done = True break @@ -366,7 +370,16 @@ def add_path(paths, path): if not path_in_list(paths, path): paths.append(path) -def do_svn_add(path_offset, source_rev, parent_copyfrom_path="", parent_copyfrom_rev="", \ +def in_ancestors(ancestors, ancestor): + match = True + for idx in range(len(ancestors)-1, 0, -1): + if int(ancestors[idx]['revision']) > ancestor['revision']: + match = is_child_path(ancestor['path'], ancestors[idx]['path']) + break + return match + +def do_svn_add(source_url, path_offset, source_rev, source_ancestors, \ + parent_copyfrom_path="", parent_copyfrom_rev="", \ export_paths={}, is_dir = False, skip_paths=[], prefix = ""): """ Given the add'd source path, replay the "svn add/copy" commands to correctly @@ -392,12 +405,15 @@ def do_svn_add(path_offset, source_rev, parent_copyfrom_path="", parent_copyfrom 'export_paths' is the list of path_offset's that we've deferred running "svn export" on. 'is_dir' is whether path_offset is a directory (rather than a file). """ - ui.status(prefix + ">> do_svn_add: %s %s", source_base+"/"+path_offset+"@"+str(source_rev), + source_base = source_url[len(source_repos_url):] # e.g. '/trunk' + ui.status(prefix + ">> do_svn_add: %s %s", join_path(source_base, path_offset)+"@"+str(source_rev), " (parent-copyfrom: "+parent_copyfrom_path+"@"+str(parent_copyfrom_rev)+")" if parent_copyfrom_path else "", level=ui.DEBUG, color='GREEN') # Check if the given path has ancestors which chain back to the current source_base found_ancestor = False - ancestors = find_svn_ancestors(source_repos_url, source_base+"/"+path_offset, source_rev, source_base, prefix+" ") + ancestors = find_svn_ancestors(source_repos_url, join_path(source_base, path_offset), source_rev, source_base, prefix+" ") + if ancestors and not in_ancestors(source_ancestors, ancestors[len(ancestors)-1]): + ancestors = [] # ancestors[n] is the original (pre-branch-copy) trunk path. # ancestors[n-1] is the first commit on the new branch. copyfrom_path = ancestors[len(ancestors)-1]['path'] if ancestors else "" @@ -439,20 +455,20 @@ def do_svn_add(path_offset, source_rev, parent_copyfrom_path="", parent_copyfrom # If we have a parent copy-from path, we mis-match that so display a status # message describing the action we're mimic'ing. If path_in_svn, then this # is logically a "replace" rather than an "add". - ui.status(" %s %s (from %s)", ('R' if path_in_svn else 'A'), source_base+"/"+path_offset, ancestors[1]['path']+"@"+str(copyfrom_rev), level=ui.VERBOSE) + ui.status(" %s %s (from %s)", ('R' if path_in_svn else 'A'), join_path(source_base, path_offset), ancestors[1]['path']+"@"+str(copyfrom_rev), level=ui.VERBOSE) if path_in_svn: # If local file is already under version-control, then this is a replace. ui.status(prefix + ">> do_svn_add: pre-copy: local path already exists: %s", path_offset, level=ui.DEBUG, color='GREEN') run_svn(["update", path_offset]) run_svn(["remove", "--force", path_offset]) - run_svn(["copy", "-r", tgt_rev, target_url+"/"+copyfrom_offset+"@"+str(tgt_rev), path_offset]) + run_svn(["copy", "-r", tgt_rev, join_path(target_url, copyfrom_offset)+"@"+str(tgt_rev), path_offset]) if is_dir: # Export the final verison of all files in this folder. add_path(export_paths, path_offset) else: # Export the final verison of this file. run_svn(["export", "--force", "-r", source_rev, - source_repos_url+source_base+"/"+path_offset+"@"+str(source_rev), path_offset]) + source_repos_url+join_path(source_base, path_offset)+"@"+str(source_rev), path_offset]) if options.keep_prop: sync_svn_props(source_url, source_rev, path_offset) else: @@ -463,8 +479,8 @@ def do_svn_add(path_offset, source_rev, parent_copyfrom_path="", parent_copyfrom # Create (parent) directory if needed. # TODO: This is (nearly) a duplicate of code in process_svn_log_entry(). Should this be # split-out to a shared tag? - p_path = path_offset if is_dir else os.path.dirname(path_offset).strip() or '.' - if not os.path.exists(p_path): + p_path = path_offset if is_dir else os.path.dirname(path_offset).strip() or None + if p_path and not os.path.exists(p_path): run_svn(["mkdir", p_path]) if not in_svn(path_offset, prefix=prefix+" "): if is_dir: @@ -474,7 +490,7 @@ def do_svn_add(path_offset, source_rev, parent_copyfrom_path="", parent_copyfrom # Export the final verison of this file. We *need* to do this before running # the "svn add", even if we end-up re-exporting this file again via export_paths. run_svn(["export", "--force", "-r", source_rev, - source_repos_url+source_base+"/"+path_offset+"@"+str(source_rev), path_offset]) + source_repos_url+join_path(source_base, path_offset)+"@"+str(source_rev), path_offset]) # If not already under version-control, then "svn add" this file/folder. run_svn(["add", "--parents", path_offset]) if options.keep_prop: @@ -482,34 +498,39 @@ def do_svn_add(path_offset, source_rev, parent_copyfrom_path="", parent_copyfrom if is_dir: # For any folders that we process, process any child contents, so that we correctly # replay copies/replaces/etc. - do_svn_add_dir(path_offset, source_rev, copyfrom_path, copyfrom_rev, export_paths, skip_paths, prefix+" ") + do_svn_add_dir(source_url, path_offset, source_rev, source_ancestors, + copyfrom_path, copyfrom_rev, export_paths, skip_paths, prefix+" ") -def do_svn_add_dir(path_offset, source_rev, parent_copyfrom_path, parent_copyfrom_rev, \ +def do_svn_add_dir(source_url, path_offset, source_rev, source_ancestors, \ + parent_copyfrom_path, parent_copyfrom_rev, \ export_paths, skip_paths, prefix=""): + source_base = source_url[len(source_repos_url):] # e.g. '/trunk' # Get the directory contents, to compare between the local WC (target_url) vs. the remote repo (source_url) # TODO: paths_local won't include add'd paths because "svn ls" lists the contents of the # associated remote repo folder. (Is this a problem?) paths_local = get_svn_dirlist(path_offset) - paths_remote = get_svn_dirlist(source_url+"/"+path_offset, source_rev) + paths_remote = get_svn_dirlist(join_path(source_url, path_offset), source_rev) ui.status(prefix + ">> do_svn_add_dir: paths_local: %s", str(paths_local), level=ui.DEBUG, color='GREEN') ui.status(prefix + ">> do_svn_add_dir: paths_remote: %s", str(paths_remote), level=ui.DEBUG, color='GREEN') # Update files/folders which exist in remote but not local for path in paths_remote: path_is_dir = True if path[-1] == "/" else False - working_path = path_offset+"/"+(path.rstrip('/') if path_is_dir else path) + working_path = join_path(path_offset, (path.rstrip('/') if path_is_dir else path)).lstrip('/') + #print "working_path:%s = path_offset:%s + path:%s" % (working_path, path_offset, path) if not working_path in skip_paths: - do_svn_add(working_path, source_rev, parent_copyfrom_path, parent_copyfrom_rev, + do_svn_add(source_url, working_path, source_rev, source_ancestors, + parent_copyfrom_path, parent_copyfrom_rev, export_paths, path_is_dir, skip_paths, prefix+" ") # Remove files/folders which exist in local but not remote for path in paths_local: if not path in paths_remote: - ui.status(" %s %s", 'D', source_base+"/"+path_offset+"/"+path, level=ui.VERBOSE) - run_svn(["update", path_offset+"/"+path]) - run_svn(["remove", "--force", path_offset+"/"+path]) + ui.status(" %s %s", 'D', join_path(join_path(source_base, path_offset), path), level=ui.VERBOSE) + run_svn(["update", join_path(path_offset, path)]) + run_svn(["remove", "--force", join_path(path_offset, path)]) # TODO: Does this handle deleted folders too? Wouldn't want to have a case # where we only delete all files from folder but leave orphaned folder around. -def process_svn_log_entry(log_entry, commit_paths, prefix = ""): +def process_svn_log_entry(log_entry, ancestors, commit_paths, prefix = ""): """ Process SVN changes from the given log entry. Build an array (commit_paths) of the paths in the working-copy that were changed, i.e. the paths which @@ -517,6 +538,8 @@ def process_svn_log_entry(log_entry, commit_paths, prefix = ""): """ export_paths = [] source_rev = log_entry['revision'] + source_url = log_entry['url'] + source_base = source_url[len(source_repos_url):] # e.g. '/trunk' ui.status(prefix + ">> process_svn_log_entry: %s", source_url+"@"+str(source_rev), level=ui.DEBUG, color='GREEN') for d in log_entry['changed_paths']: # Get the full path for this changed_path @@ -592,12 +615,12 @@ def process_svn_log_entry(log_entry, commit_paths, prefix = ""): # to re-create the correct ancestry. tmp_path_offset = tmp_path[len(source_base):].strip("/") skip_paths.append(tmp_path_offset) - do_svn_add(path_offset, source_rev, "", "", export_paths, path_is_dir, skip_paths, prefix+" ") + do_svn_add(source_url, path_offset, source_rev, ancestors, "", "", export_paths, path_is_dir, skip_paths, prefix+" ") # Else just "svn export" the files from the source repo and "svn add" them. else: # Create (parent) directory if needed - p_path = path_offset if path_is_dir else os.path.dirname(path_offset).strip() or '.' - if not os.path.exists(p_path): + p_path = path_offset if path_is_dir else os.path.dirname(path_offset).strip() or None + if p_path and not os.path.exists(p_path): run_svn(["mkdir", p_path]) # Export the entire added tree. if path_is_dir: @@ -614,7 +637,7 @@ def process_svn_log_entry(log_entry, commit_paths, prefix = ""): # Export the final verison of this file. We *need* to do this before running # the "svn add", even if we end-up re-exporting this file again via export_paths. run_svn(["export", "--force", "-r", source_rev, - source_url+"/"+path_offset+"@"+str(source_rev), path_offset]) + join_path(source_url, path_offset)+"@"+str(source_rev), path_offset]) if not in_svn(path_offset, prefix=prefix+" "): # Need to use in_svn here to handle cases where client committed the parent # folder and each indiv sub-folder. @@ -633,7 +656,7 @@ def process_svn_log_entry(log_entry, commit_paths, prefix = ""): elif action == 'M': if path_is_file: run_svn(["export", "--force", "-N" , "-r", source_rev, - source_url+"/"+path_offset+"@"+str(source_rev), path_offset]) + join_path(source_url, path_offset)+"@"+str(source_rev), path_offset]) if path_is_dir: # For dirs, need to "svn update" before export/prop-sync because the # final "svn commit" will fail if the parent is at a lower rev than @@ -652,7 +675,7 @@ def process_svn_log_entry(log_entry, commit_paths, prefix = ""): if export_paths: for path_offset in export_paths: run_svn(["export", "--force", "-r", source_rev, - source_url+"/"+path_offset+"@"+str(source_rev), path_offset]) + join_path(source_url, path_offset)+"@"+str(source_rev), path_offset]) def keep_revnum(source_rev, target_rev_last, wc_target_tmp): """ @@ -748,15 +771,19 @@ def real_main(args, parser): # TODO: Warn user if trying to start (non-continue) into a non-empty target path? # Get the first log entry at/after source_start_rev, which is where # we'll do the initial import from. - it_log_start = svnclient.iter_svn_log_entries(source_url, source_start_rev, source_end_rev, get_changed_paths=False) - for source_start_log in it_log_start: + source_ancestors = find_svn_ancestors(source_repos_url, source_base, source_end_rev, prefix=" ") + it_log_start = svnclient.iter_svn_log_entries(source_url, source_start_rev, source_end_rev, get_changed_paths=False, ancestors=source_ancestors) + source_start_log = None + for log_entry in it_log_start: + # Pick the first entry. Need to use a "for ..." loop since we're using an iterator. + source_start_log = log_entry break if not source_start_log: raise InternalError("Unable to find any matching revisions between %s:%s in source_url: %s" % \ (source_start_rev, source_end_rev, source_url)) # This is the revision we will start from for source_url - source_start_rev = source_rev = int(source_start_log['revision']) + source_start_rev = int(source_start_log['revision']) ui.status("Starting at source revision %s.", source_start_rev, level=ui.VERBOSE) ui.status("") if options.keep_revnum and source_rev > target_rev_last: @@ -764,9 +791,10 @@ def real_main(args, parser): # For the initial commit to the target URL, export all the contents from # the source URL at the start-revision. - disp_svn_log_summary(svnclient.get_one_svn_log_entry(source_url, source_rev, source_rev)) + disp_svn_log_summary(svnclient.get_one_svn_log_entry(source_repos_url, source_start_rev, source_start_rev)) # Export and add file-contents from source_url@source_start_rev - top_paths = run_svn(["list", "-r", source_rev, source_url+"@"+str(source_rev)]) + source_start_url = source_url if not source_ancestors else source_repos_url+source_ancestors[len(source_ancestors)-1]['path'] + top_paths = run_svn(["list", "-r", source_start_rev, source_start_url+"@"+str(source_start_rev)]) top_paths = top_paths.strip("\n").split("\n") for path in top_paths: # For each top-level file/folder... @@ -776,32 +804,32 @@ def real_main(args, parser): path_is_dir = True if path[-1] == "/" else False path_offset = path.rstrip('/') if path_is_dir else path if in_svn(path_offset, prefix=" "): - raise InternalError("Cannot replay history on top of pre-existing structure: %s" % source_url+"/"+path_offset) + raise InternalError("Cannot replay history on top of pre-existing structure: %s" % join_path(source_start_url, path_offset)) if path_is_dir and not os.path.exists(path_offset): os.makedirs(path_offset) - run_svn(["export", "--force", "-r" , source_rev, source_url+"/"+path_offset+"@"+str(source_rev), path_offset]) + run_svn(["export", "--force", "-r" , source_start_rev, join_path(source_start_url, path_offset)+"@"+str(source_start_rev), path_offset]) run_svn(["add", path_offset]) # Update any properties on the newly added content - paths = run_svn(["list", "--recursive", "-r", source_rev, source_url+"@"+str(source_rev)]) + paths = run_svn(["list", "--recursive", "-r", source_start_rev, source_start_url+"@"+str(source_start_rev)]) paths = paths.strip("\n").split("\n") if options.keep_prop: - sync_svn_props(source_url, source_rev, "") + sync_svn_props(source_start_url, source_start_rev, "") for path in paths: if not path: continue # Directories have a trailing slash in the "svn list" output path_is_dir = True if path[-1] == "/" else False path_offset = path.rstrip('/') if path_is_dir else path - ui.status(" A %s", source_base+"/"+path_offset, level=ui.VERBOSE) + ui.status(" A %s", join_path(source_base, path_offset), level=ui.VERBOSE) if options.keep_prop: - sync_svn_props(source_url, source_rev, path_offset) + sync_svn_props(source_start_url, source_start_rev, path_offset) # Commit the initial import num_entries_proc += 1 - target_revprops = gen_tracking_revprops(source_rev) # Build source-tracking revprop's + target_revprops = gen_tracking_revprops(source_start_rev) # Build source-tracking revprop's target_rev = commit_from_svn_log_entry(source_start_log, target_revprops=target_revprops) if target_rev: # Update rev_map, mapping table of source-repo rev # -> target-repo rev # - set_rev_map(source_rev, target_rev) + set_rev_map(source_start_rev, target_rev) commit_count += 1 target_rev_last = target_rev else: @@ -822,7 +850,8 @@ def real_main(args, parser): svn_vers = float(".".join(map(str, svn_vers_t[0:2]))) # Load SVN log starting from source_start_rev + 1 - it_log_entries = svnclient.iter_svn_log_entries(source_url, source_start_rev+1, source_end_rev, get_revprops=True) if source_start_rev < source_end_rev else [] + source_ancestors = find_svn_ancestors(source_repos_url, source_base, source_end_rev, prefix=" ") + it_log_entries = svnclient.iter_svn_log_entries(source_url, source_start_rev+1, source_end_rev, get_revprops=True, ancestors=source_ancestors) if source_start_rev < source_end_rev else [] source_rev = None # TODO: Now that commit_from_svn_log_entry() might try to do a "svn propset svn:date", @@ -835,12 +864,14 @@ def real_main(args, parser): break # Replay this revision from source_url into target_url source_rev = log_entry['revision'] + log_url = log_entry['url'] + #print "source_url:%s log_url:%s" % (source_url, log_url) if options.keep_revnum: target_rev_last = keep_revnum(source_rev, target_rev_last, wc_target_tmp) disp_svn_log_summary(log_entry) # Process all the changed-paths in this log entry commit_paths = [] - process_svn_log_entry(log_entry, commit_paths) + process_svn_log_entry(log_entry, source_ancestors, commit_paths) num_entries_proc += 1 # Commit any changes made to _wc_target target_revprops = gen_tracking_revprops(source_rev) # Build source-tracking revprop's diff --git a/svn2svn/svnclient.py b/svn2svn/svnclient.py index 301a5a1..7585335 100644 --- a/svn2svn/svnclient.py +++ b/svn2svn/svnclient.py @@ -289,14 +289,15 @@ log_duration_threshold = 10.0 log_min_chunk_length = 10 log_max_chunk_length = 10000 -def iter_svn_log_entries(svn_url, first_rev, last_rev, stop_on_copy=False, get_changed_paths=True, get_revprops=False): +def iter_svn_log_entries(svn_url, first_rev, last_rev, stop_on_copy=False, get_changed_paths=True, get_revprops=False, ancestors=[]): """ Iterate over SVN log entries between first_rev and last_rev. This function features chunked log fetching so that it isn't too nasty to the SVN server if many entries are requested. - NOTE: This chunked log fetching *ONLY* works correctly on paths which + NOTE: If *not* passing in the explicit (pre-calculated) 'ancestors' list, + this chunked log fetching *ONLY* works correctly on paths which are known to have existed unbroken in the SVN repository, e.g. /trunk. Chunked fetching breaks down if a path existed in earlier, then was deleted, and later was re-created. For example, if path was created in r5, @@ -305,24 +306,51 @@ def iter_svn_log_entries(svn_url, first_rev, last_rev, stop_on_copy=False, get_c --> would yield r5, i.e. the _initial_ creation svn log --stop-on-copy --limit 1 -r 1:HEAD "path/to/file" --> would yield r5000, i.e. the _re-creation_ - In theory this might work if we always search "backwards", searching from - the end going forward rather than forward going to the end... + Use find_svn_ancestors() to pass in the 'ancestors' array so that + we can correctly re-trace ancestry here. """ + info = get_svn_info(svn_url) + svn_repos_url = info['repos_url'] if last_rev == "HEAD": - info = get_svn_info(svn_url) last_rev = info['revision'] + cur_url = svn_url cur_rev = first_rev + cur_anc_idx = None + cur_anc_end_rev = None + if ancestors: + #print ancestors + for idx in range(len(ancestors)-1, 0, -1): + if int(ancestors[idx]['revision']) > first_rev: + #print "Match ancestors["+str(idx)+"]" + cur_url = svn_repos_url+ancestors[idx]['path'] + cur_anc_end_rev = ancestors[idx]['revision'] + cur_anc_idx = idx + break chunk_length = log_min_chunk_length while cur_rev <= last_rev: + #print "cur_rev:%s cur_anc_end_rev:%s cur_anc_idx:%s" % (cur_rev, str(cur_anc_end_rev), cur_anc_idx) + if cur_anc_end_rev and cur_rev >= cur_anc_end_rev: + cur_anc_idx -= 1 + if cur_anc_idx >= 0: + idx = cur_anc_idx + #print "Match ancestors["+str(idx)+"]" + cur_url = svn_repos_url+ancestors[idx]['path'] + cur_anc_end_rev = ancestors[idx]['revision'] + else: + cur_anc_end_rev = None start_t = time.time() stop_rev = min(last_rev, cur_rev + chunk_length) - entries = run_svn_log(svn_url, cur_rev, stop_rev, chunk_length, + stop_rev = min(stop_rev, cur_anc_end_rev) if cur_anc_end_rev else stop_rev + entries = run_svn_log(cur_url, cur_rev, stop_rev, chunk_length, stop_on_copy, get_changed_paths, get_revprops) duration = time.time() - start_t if entries: for e in entries: if e['revision'] > last_rev: break + # Embed the current URL in the yielded dict, for ancestor cases where + # we might have followed a copy-from to some non-original URL. + e['url'] = cur_url yield e if e['revision'] >= last_rev: break diff --git a/tests/check-replay-repo.sh b/tests/check-replay-repo.sh index cf2101b..7c74a82 100755 --- a/tests/check-replay-repo.sh +++ b/tests/check-replay-repo.sh @@ -11,6 +11,7 @@ found_diff=0 # Note: We assume that the replay working-copy ("_wc_target") still exists from make-replay-repo.sh #svn co -q file://$PWD/_repo_ref $WCREF svn co -q file://$PWD/_repo_ref/trunk $WCREF +#svn co -q file://$PWD/_repo_ref/trunk/Module2/ProjectB $WCREF # Check if the final list of files is the same echo ">> Checking file-list..." diff --git a/tests/make-replay-repo.sh b/tests/make-replay-repo.sh index 621e949..367d911 100755 --- a/tests/make-replay-repo.sh +++ b/tests/make-replay-repo.sh @@ -23,3 +23,8 @@ echo "" # svn2svn /trunk svn mkdir -q -m "Add /trunk" $REPOURL/trunk ../svn2svn.py $* file://$PWD/_repo_ref/trunk file://$PWD/_repo_replay/trunk + +## svn2svn /trunk/Module2/ProjectB +#svn mkdir -q -m "Add /trunk" $REPOURL/trunk +#svn mkdir -q --parents -m "Add /trunk/Module2/ProjectB" $REPOURL/trunk/Module2/ProjectB +#../svn2svn.py $* file://$PWD/_repo_ref/trunk/Module2/ProjectB file://$PWD/_repo_replay/trunk/Module2/ProjectB -- 2.45.2 From 8e4b538fab45c1cecad2c938050f42d451b43b40 Mon Sep 17 00:00:00 2001 From: Tony Duckles Date: Wed, 14 Mar 2012 22:41:00 -0500 Subject: [PATCH 11/16] Fix iter_svn_log_entries ancestry-handling problems Change return format of find_svn_ancestors() to include copyfrom_path+copyfrom_rev info. We need this in iter_svn_log_entries() for correctly "svn log"'ing over only valid ancestry. Previously, we were basically assuming that copyfrom_rev was always revision-1, and trying to "svn log" on that revision-range can yield errors. * svn2svn/run/svn2svn.py (find_svn_ancestors): Change return format so that each entry in the array is a path+revision+copyfrom_path+copyfrom_rev tuplet. * svn2svn/run/svn2svn.py (do_svn_add): Minor changes to support new find_svn_ancestors() return format. * svn2svn/run/svn2svn.py (do_svn_add_dir): Fix local-not-remote handling, to correctly handling directories. --- svn2svn/run/svn2svn.py | 52 ++++++++++++++++++------------------------ svn2svn/svnclient.py | 23 ++++++++++++------- 2 files changed, 37 insertions(+), 38 deletions(-) diff --git a/svn2svn/run/svn2svn.py b/svn2svn/run/svn2svn.py index 40bc204..2259351 100644 --- a/svn2svn/run/svn2svn.py +++ b/svn2svn/run/svn2svn.py @@ -198,7 +198,7 @@ def find_svn_ancestors(svn_repos_url, start_path, start_rev, stop_base_path=None cur_path = start_path cur_rev = start_rev first_iter_done = False - ancestors_temp = [] + ancestors = [] while not done: # Get the first "svn log" entry for cur_path (relative to @cur_rev) ui.status(prefix + ">> find_svn_ancestors: %s", svn_repos_url+cur_path+"@"+str(cur_rev), level=ui.DEBUG, color='YELLOW') @@ -263,38 +263,29 @@ def find_svn_ancestors(svn_repos_url, start_path, start_rev, stop_base_path=None ui.status(prefix + ">> find_svn_ancestors: Found copy-from (action=%s): %s --> %s", action, path, d['copyfrom_path']+"@"+str(d['copyfrom_revision']), level=ui.DEBUG, color='YELLOW') - ancestors_temp.append({'path': path, 'revision': log_entry['revision'], - 'copyfrom_path': d['copyfrom_path'], 'copyfrom_rev': d['copyfrom_revision']}) + ancestors.append({'path': cur_path, 'revision': log_entry['revision'], + 'copyfrom_path': cur_path.replace(d['path'], d['copyfrom_path']), 'copyfrom_rev': d['copyfrom_revision']}) cur_path = cur_path.replace(d['path'], d['copyfrom_path']) cur_rev = d['copyfrom_revision'] # Follow the copy and keep on searching break - ancestors = [] if stop_base_path and no_ancestry: # If we're tracing back ancestry to a specific target stop_base_path and # the ancestry-chain stopped before we reached stop_base_path, then return # nothing since there is no ancestry chaining back to that target. - ancestors_temp = [] - if ancestors_temp: - ancestors.append({'path': start_path, 'revision': start_rev}) - cur_path = start_path - for idx in range(len(ancestors_temp)): - d = ancestors_temp[idx] - cur_path = cur_path.replace(d['path'], d['copyfrom_path']) - cur_rev = d['copyfrom_rev'] - ancestors.append({'path': cur_path, 'revision': cur_rev}) + ancestors = [] + if ancestors: if ui.get_level() >= ui.DEBUG: max_len = 0 for idx in range(len(ancestors)): d = ancestors[idx] max_len = max(max_len, len(d['path']+"@"+str(d['revision']))) ui.status(prefix + ">> find_svn_ancestors: Found parent ancestors:", level=ui.DEBUG, color='YELLOW_B') - for idx in range(len(ancestors)-1): + for idx in range(len(ancestors)): d = ancestors[idx] - d_next = ancestors[idx+1] - ui.status(prefix + " [%s] %s <-- %s", idx, + ui.status(prefix + " [%s] %s --> %s", idx, str(d['path']+"@"+str(d['revision'])).ljust(max_len), - str(d_next['path']+"@"+str(d_next['revision'])).ljust(max_len), + str(d['copyfrom_path']+"@"+str(d['copyfrom_rev'])), level=ui.DEBUG, color='YELLOW') else: ui.status(prefix + ">> find_svn_ancestors: No ancestor-chain found: %s", @@ -411,14 +402,13 @@ def do_svn_add(source_url, path_offset, source_rev, source_ancestors, \ level=ui.DEBUG, color='GREEN') # Check if the given path has ancestors which chain back to the current source_base found_ancestor = False - ancestors = find_svn_ancestors(source_repos_url, join_path(source_base, path_offset), source_rev, source_base, prefix+" ") - if ancestors and not in_ancestors(source_ancestors, ancestors[len(ancestors)-1]): - ancestors = [] - # ancestors[n] is the original (pre-branch-copy) trunk path. - # ancestors[n-1] is the first commit on the new branch. - copyfrom_path = ancestors[len(ancestors)-1]['path'] if ancestors else "" - copyfrom_rev = ancestors[len(ancestors)-1]['revision'] if ancestors else "" - if ancestors: + ancestors = find_svn_ancestors(source_repos_url, join_path(source_base, path_offset), source_rev, stop_base_path=source_base, prefix=prefix+" ") + ancestor = ancestors[len(ancestors)-1] if ancestors else None # Choose the eldest ancestor, i.e. where we reached stop_base_path=source_base + if ancestor and not in_ancestors(source_ancestors, ancestor): + ancestor = None + copyfrom_path = ancestor['copyfrom_path'] if ancestor else "" + copyfrom_rev = ancestor['copyfrom_rev'] if ancestor else "" + if ancestor: # The copy-from path has ancestry back to source_url. ui.status(prefix + ">> do_svn_add: Check copy-from: Found parent: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN', bold=True) @@ -455,7 +445,7 @@ def do_svn_add(source_url, path_offset, source_rev, source_ancestors, \ # If we have a parent copy-from path, we mis-match that so display a status # message describing the action we're mimic'ing. If path_in_svn, then this # is logically a "replace" rather than an "add". - ui.status(" %s %s (from %s)", ('R' if path_in_svn else 'A'), join_path(source_base, path_offset), ancestors[1]['path']+"@"+str(copyfrom_rev), level=ui.VERBOSE) + ui.status(" %s %s (from %s)", ('R' if path_in_svn else 'A'), join_path(source_base, path_offset), ancestors[0]['copyfrom_path']+"@"+str(copyfrom_rev), level=ui.VERBOSE) if path_in_svn: # If local file is already under version-control, then this is a replace. ui.status(prefix + ">> do_svn_add: pre-copy: local path already exists: %s", path_offset, level=ui.DEBUG, color='GREEN') @@ -524,9 +514,11 @@ def do_svn_add_dir(source_url, path_offset, source_rev, source_ancestors, \ # Remove files/folders which exist in local but not remote for path in paths_local: if not path in paths_remote: - ui.status(" %s %s", 'D', join_path(join_path(source_base, path_offset), path), level=ui.VERBOSE) - run_svn(["update", join_path(path_offset, path)]) - run_svn(["remove", "--force", join_path(path_offset, path)]) + path_is_dir = True if path[-1] == "/" else False + working_path = join_path(path_offset, (path.rstrip('/') if path_is_dir else path)).lstrip('/') + ui.status(" %s %s", 'D', join_path(source_base, working_path), level=ui.VERBOSE) + run_svn(["update", working_path]) + run_svn(["remove", "--force", working_path]) # TODO: Does this handle deleted folders too? Wouldn't want to have a case # where we only delete all files from folder but leave orphaned folder around. @@ -793,7 +785,7 @@ def real_main(args, parser): # the source URL at the start-revision. disp_svn_log_summary(svnclient.get_one_svn_log_entry(source_repos_url, source_start_rev, source_start_rev)) # Export and add file-contents from source_url@source_start_rev - source_start_url = source_url if not source_ancestors else source_repos_url+source_ancestors[len(source_ancestors)-1]['path'] + source_start_url = source_url if not source_ancestors else source_repos_url+source_ancestors[len(source_ancestors)-1]['copyfrom_path'] top_paths = run_svn(["list", "-r", source_start_rev, source_start_url+"@"+str(source_start_rev)]) top_paths = top_paths.strip("\n").split("\n") for path in top_paths: diff --git a/svn2svn/svnclient.py b/svn2svn/svnclient.py index 7585335..9faef8e 100644 --- a/svn2svn/svnclient.py +++ b/svn2svn/svnclient.py @@ -306,13 +306,18 @@ def iter_svn_log_entries(svn_url, first_rev, last_rev, stop_on_copy=False, get_c --> would yield r5, i.e. the _initial_ creation svn log --stop-on-copy --limit 1 -r 1:HEAD "path/to/file" --> would yield r5000, i.e. the _re-creation_ - Use find_svn_ancestors() to pass in the 'ancestors' array so that - we can correctly re-trace ancestry here. + Use run/svn2svn.py:find_svn_ancestors() to pass in the 'ancestors' array + so that we can correctly re-trace ancestry here. """ info = get_svn_info(svn_url) svn_repos_url = info['repos_url'] if last_rev == "HEAD": last_rev = info['revision'] + if first_rev == "1": + start_log = get_first_svn_log_entry(svn_url, first_rev, last_rev, stop_on_copy=stop_on_copy, get_changed_paths=False) + if start_log['revision'] > first_rev: + first_rev = start_log['revision'] + #print "first_rev: %s" % first_rev cur_url = svn_url cur_rev = first_rev cur_anc_idx = None @@ -321,23 +326,25 @@ def iter_svn_log_entries(svn_url, first_rev, last_rev, stop_on_copy=False, get_c #print ancestors for idx in range(len(ancestors)-1, 0, -1): if int(ancestors[idx]['revision']) > first_rev: - #print "Match ancestors["+str(idx)+"]" - cur_url = svn_repos_url+ancestors[idx]['path'] - cur_anc_end_rev = ancestors[idx]['revision'] + #print "Match ancestors[%s]: %s" % (idx, ancestors[idx]) + cur_url = svn_repos_url+ancestors[idx]['copyfrom_path'] + cur_anc_end_rev = ancestors[idx]['copyfrom_rev'] cur_anc_idx = idx break chunk_length = log_min_chunk_length while cur_rev <= last_rev: #print "cur_rev:%s cur_anc_end_rev:%s cur_anc_idx:%s" % (cur_rev, str(cur_anc_end_rev), cur_anc_idx) if cur_anc_end_rev and cur_rev >= cur_anc_end_rev: + cur_rev = ancestors[cur_anc_idx]['revision'] cur_anc_idx -= 1 if cur_anc_idx >= 0: idx = cur_anc_idx - #print "Match ancestors["+str(idx)+"]" - cur_url = svn_repos_url+ancestors[idx]['path'] - cur_anc_end_rev = ancestors[idx]['revision'] + #print "Match ancestors[%s]: %s" % (idx, ancestors[idx]) + cur_url = svn_repos_url+ancestors[idx]['copyfrom_path'] + cur_anc_end_rev = ancestors[idx]['copyfrom_rev'] else: cur_anc_end_rev = None + #print "cur_rev:%s cur_anc_end_rev:%s cur_anc_idx:%s" % (cur_rev, str(cur_anc_end_rev), cur_anc_idx) start_t = time.time() stop_rev = min(last_rev, cur_rev + chunk_length) stop_rev = min(stop_rev, cur_anc_end_rev) if cur_anc_end_rev else stop_rev -- 2.45.2 From 4216a57efaa97b8b72cf16ddec26d8b3f80fe774 Mon Sep 17 00:00:00 2001 From: Tony Duckles Date: Thu, 15 Mar 2012 22:23:08 -0500 Subject: [PATCH 12/16] More fixes to iter_svn_log_entries ancestry-handling * svn2svn/svnclient.py (iter_svn_log_entries): Correctly use and respect ancestors() array: look for the next copyfrom_rev, and once we crawl past the last copy-from then start at the final path+revision. * svn2svn/run/svn2svn.py (process_svn_log_entry): For action='R' don't run "svn remove" command if path_offset="", i.e. don't try to remove the root of the WC. --- svn2svn/run/svn2svn.py | 2 +- svn2svn/svnclient.py | 36 ++++++++++++++++++++++-------------- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/svn2svn/run/svn2svn.py b/svn2svn/run/svn2svn.py index 2259351..54e7368 100644 --- a/svn2svn/run/svn2svn.py +++ b/svn2svn/run/svn2svn.py @@ -573,7 +573,7 @@ def process_svn_log_entry(log_entry, ancestors, commit_paths, prefix = ""): # then we need to run the "svn rm" first, then change action='A'. This # lets the normal code below handle re-"svn add"'ing the files. This # should replicate the "replace". - if in_svn(path_offset): + if path_offset and in_svn(path_offset): # Target path might not be under version-control yet, e.g. parent "add" # was a copy-from a branch which had no ancestry back to trunk, and each # child folder under that parent folder is a "replace" action on the final diff --git a/svn2svn/svnclient.py b/svn2svn/svnclient.py index 9faef8e..efb3212 100644 --- a/svn2svn/svnclient.py +++ b/svn2svn/svnclient.py @@ -311,40 +311,48 @@ def iter_svn_log_entries(svn_url, first_rev, last_rev, stop_on_copy=False, get_c """ info = get_svn_info(svn_url) svn_repos_url = info['repos_url'] + #print "iter_svn_log_entries: %s %s:%s" % (svn_url, first_rev, last_rev) if last_rev == "HEAD": last_rev = info['revision'] - if first_rev == "1": + if int(first_rev) == 1: start_log = get_first_svn_log_entry(svn_url, first_rev, last_rev, stop_on_copy=stop_on_copy, get_changed_paths=False) if start_log['revision'] > first_rev: first_rev = start_log['revision'] - #print "first_rev: %s" % first_rev + #print "first_rev: %s" % first_rev cur_url = svn_url cur_rev = first_rev cur_anc_idx = None cur_anc_end_rev = None if ancestors: #print ancestors - for idx in range(len(ancestors)-1, 0, -1): - if int(ancestors[idx]['revision']) > first_rev: - #print "Match ancestors[%s]: %s" % (idx, ancestors[idx]) - cur_url = svn_repos_url+ancestors[idx]['copyfrom_path'] - cur_anc_end_rev = ancestors[idx]['copyfrom_rev'] - cur_anc_idx = idx + # Crawl ancestry, from oldest to newest + for idx in range(len(ancestors)-1, -1, -1): # [n-1,...,0] + #print "(pre) Match ancestors[%s]: %s" % (idx, ancestors[idx]) + cur_url = svn_repos_url+ancestors[idx]['copyfrom_path'] + cur_anc_idx = idx + if first_rev < int(ancestors[idx]['copyfrom_rev']): + cur_anc_end_rev = int(ancestors[idx]['copyfrom_rev']) break + if cur_anc_end_rev is None: + #print "(pre) Match ancestors[0] (final): %s" % (ancestors[0]) + cur_anc_idx = -1 + cur_url = svn_repos_url+ancestors[0]['path'] chunk_length = log_min_chunk_length while cur_rev <= last_rev: - #print "cur_rev:%s cur_anc_end_rev:%s cur_anc_idx:%s" % (cur_rev, str(cur_anc_end_rev), cur_anc_idx) + #print "cur_rev:%s cur_anc_end_rev:%s cur_anc_idx:%s %s" % (cur_rev, str(cur_anc_end_rev), cur_anc_idx, cur_url) if cur_anc_end_rev and cur_rev >= cur_anc_end_rev: - cur_rev = ancestors[cur_anc_idx]['revision'] + cur_rev = int(ancestors[cur_anc_idx]['revision']) cur_anc_idx -= 1 if cur_anc_idx >= 0: idx = cur_anc_idx - #print "Match ancestors[%s]: %s" % (idx, ancestors[idx]) + #print "(loop) Match ancestors[%s]: %s" % (idx, ancestors[idx]) cur_url = svn_repos_url+ancestors[idx]['copyfrom_path'] - cur_anc_end_rev = ancestors[idx]['copyfrom_rev'] + cur_anc_end_rev = int(ancestors[idx]['copyfrom_rev']) else: + #print "(loop) Match ancestors[0] (final): %s" % (ancestors[0]) + cur_url = svn_repos_url+ancestors[0]['path'] cur_anc_end_rev = None - #print "cur_rev:%s cur_anc_end_rev:%s cur_anc_idx:%s" % (cur_rev, str(cur_anc_end_rev), cur_anc_idx) + #print "cur_rev:%s cur_anc_end_rev:%s cur_anc_idx:%s %s" % (cur_rev, str(cur_anc_end_rev), cur_anc_idx, cur_url) start_t = time.time() stop_rev = min(last_rev, cur_rev + chunk_length) stop_rev = min(stop_rev, cur_anc_end_rev) if cur_anc_end_rev else stop_rev @@ -361,7 +369,7 @@ def iter_svn_log_entries(svn_url, first_rev, last_rev, stop_on_copy=False, get_c yield e if e['revision'] >= last_rev: break - cur_rev = e['revision']+1 + cur_rev = int(e['revision'])+1 else: cur_rev = int(stop_rev)+1 # Adapt chunk length based on measured request duration -- 2.45.2 From 77f6ec1edad1a51c216eb8049e885fd0ff671290 Mon Sep 17 00:00:00 2001 From: Tony Duckles Date: Sat, 24 Mar 2012 13:45:29 -0500 Subject: [PATCH 13/16] Verify-mode Add command-line args for verifying content and ancestry, i.e. for self-testing the replay. Compare the source vs. target history to make sure we ended-up with equivalent target revisions for each applicable source revision. * svn2svn/run/svn2svn.py (verify_commit): Adding. * tests/make-ref-repo.sh: Add tests for source revisions which we'll never have an equivalent target revision for, e.g. svn:mergeinfo property changes. --- svn2svn/__init__.py | 2 +- svn2svn/run/svn2svn.py | 246 ++++++++++++++++++++++++++++++++++++++++- tests/make-ref-repo.sh | 3 + 3 files changed, 246 insertions(+), 5 deletions(-) diff --git a/svn2svn/__init__.py b/svn2svn/__init__.py index bdca953..d0345a8 100644 --- a/svn2svn/__init__.py +++ b/svn2svn/__init__.py @@ -2,7 +2,7 @@ __all__ = [] __author__ = 'Tony Duckles' __license__ = 'GNU General Public License (version 3 or later)' -__versioninfo__ = (1, 2, 0) +__versioninfo__ = (1, 4, 0) base_version = '.'.join(map(str, __versioninfo__)) full_version = base_version diff --git a/svn2svn/run/svn2svn.py b/svn2svn/run/svn2svn.py index 54e7368..cf30f7e 100644 --- a/svn2svn/run/svn2svn.py +++ b/svn2svn/run/svn2svn.py @@ -5,7 +5,7 @@ Replicate (replay) changesets from one SVN repository to another. from .. import base_version, full_version from .. import ui from .. import svnclient -from ..shell import run_svn +from ..shell import run_svn,run_shell_command from ..errors import (ExternalCommandFailed, UnsupportedSVNAction, InternalError, VerificationError) from parse import HelpFormatter @@ -27,6 +27,8 @@ source_repos_url = "" # URL to root of source SVN repo, e.g. 'http://s source_base = "" # Relative path of source_url in source SVN repo, e.g. '/trunk' source_repos_uuid = "" # UUID of source SVN repo target_url ="" # URL to target path in target SVN repo, e.g. 'file:///svn/repo_target/trunk' +target_repos_url = "" # URL to root of target SVN repo, e.g. 'http://server/svn/target' +target_base = "" # Relative path of target_url in target SVN repo, e.g. '/trunk' rev_map = {} # The running mapping-table dictionary for source_url rev #'s -> target_url rev #'s options = None # optparser options @@ -95,6 +97,231 @@ def commit_from_svn_log_entry(log_entry, commit_paths=None, target_revprops=None run_svn(["propset", "--revprop", "-r", rev_num, "svn:date", log_entry['date_raw']]) return rev_num +def verify_commit(source_rev, target_rev, log_entry=None): + """ + Compare the ancestry/content/properties between source_url vs target_url + for a given revision. + """ + # Gather the offsets in the source repo to check + check_paths = [] + remove_paths = [] + # TODO: Need to make this ancestry aware + if options.verify == 1 and log_entry is not None: # Changed only + ui.status("Verifying source revision %s (only-changed)...", source_rev, level=ui.VERBOSE) + for d in log_entry['changed_paths']: + path = d['path'] + if not is_child_path(path, source_base): + continue + if d['kind'] == "": + d['kind'] = svnclient.get_kind(source_repos_url, path, source_rev, d['action'], log_entry['changed_paths']) + assert (d['kind'] == 'file') or (d['kind'] == 'dir') + path_is_dir = True if d['kind'] == 'dir' else False + path_is_file = True if d['kind'] == 'file' else False + path_offset = path[len(source_base):].strip("/") + if d['action'] == 'D': + remove_paths.append(path_offset) + elif not path_offset in check_paths: + ui.status("verify_commit: path [mode=changed]: kind=%s: %s", d['kind'], path, level=ui.DEBUG, color='YELLOW') + if path_is_file: + ui.status(" "+"verify_commit [mode=changed]: check_paths.append('%s')", path_offset, level=ui.DEBUG, color='GREEN') + check_paths.append(path_offset) + if path_is_dir: + if not d['action'] in 'AR': + continue + child_paths = run_svn(["list", "--recursive", "-r", source_rev, source_url.rstrip("/")+"/"+path_offset+"@"+str(source_rev)]) + child_paths = child_paths.strip("\n").split("\n") + for child_path in child_paths: + if not child_path: + continue + # Directories have a trailing slash in the "svn list" output + child_path_is_dir = True if child_path[-1] == "/" else False + child_path_offset = child_path.rstrip('/') if child_path_is_dir else child_path + if not child_path_is_dir: + # Only check files + working_path = (path_offset+"/" if path_offset else "") + child_path_offset + if not working_path in check_paths: + ui.status(" "+"verify_commit [mode=changed]: check_paths.append('%s'+'/'+'%s')", path_offset, child_path_offset, level=ui.DEBUG, color='GREEN') + check_paths.append(working_path) + if options.verify == 2: # All paths + ui.status("Verifying source revision %s (all)...", source_rev, level=ui.VERBOSE) + child_paths = run_svn(["list", "--recursive", "-r", source_rev, source_url+"@"+str(source_rev)]) + child_paths = child_paths.strip("\n").split("\n") + for child_path in child_paths: + if not child_path: + continue + # Directories have a trailing slash in the "svn list" output + child_path_is_dir = True if child_path[-1] == "/" else False + child_path_offset = child_path.rstrip('/') if child_path_is_dir else child_path + if not child_path_is_dir: + # Only check files + ui.status("verify_commit [mode=all]: check_paths.append('%s')", child_path_offset, level=ui.DEBUG, color='GREEN') + check_paths.append(child_path_offset) + + # If there were any paths deleted in the last revision (options.verify=1 mode), + # check that they were correctly deleted. + if remove_paths: + count_total = len(remove_paths) + count = 0 + for path_offset in remove_paths: + count += 1 + if in_svn(path_offset): + ui.status(" (%s/%s) Verify path: FAIL: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.VERBOSE, color='RED') + raise VerificationError("Path removed in source rev r%s, but still exists in target WC: %s" % (source_rev, path_offset)) + ui.status(" (%s/%s) Verify remove: OK: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.VERBOSE) + + # Compare each of the check_path entries between source vs. target + if check_paths: + source_rev_first = int(min(rev_map, key=rev_map.get)) or 1 # The first source_rev we replayed into target + ui.status("verify_commit: source_rev_first:%s", source_rev_first, level=ui.DEBUG, color='YELLOW') + count_total = len(check_paths) + count = 0 + for path_offset in check_paths: + count += 1 + ui.status("verify_commit: path_offset:%s", path_offset, level=ui.DEBUG, color='YELLOW') + source_log_entries = svnclient.run_svn_log(source_url.rstrip("/")+"/"+path_offset+"@"+str(source_rev), source_rev, 1, source_rev-source_rev_first+1) + target_log_entries = svnclient.run_svn_log(target_url.rstrip("/")+"/"+path_offset+"@"+str(target_rev), target_rev, 1, target_rev) + # Build a list of commits in source_log_entries which matches our + # target path_offset. + working_path = source_base+"/"+path_offset + source_revs = [] + for log_entry in source_log_entries: + source_rev_tmp = log_entry['revision'] + if source_rev_tmp < source_rev_first: + # Only process source revisions which have been replayed into target + break + #ui.status(" [verify_commit] source_rev_tmp:%s, working_path:%s\n%s", source_rev_tmp, working_path, pp.pformat(log_entry), level=ui.DEBUG, color='MAGENTA') + changed_paths_temp = [] + for d in log_entry['changed_paths']: + path = d['path'] + # Match working_path or any parents + if is_child_path(working_path, path): + ui.status(" verify_commit: changed_path: %s %s@%s (parent:%s)", d['action'], path, source_rev_tmp, working_path, level=ui.DEBUG, color='YELLOW') + changed_paths_temp.append({'path': path, 'data': d}) + assert changed_paths_temp + # Reverse-sort any matches, so that we start with the most-granular (deepest in the tree) path. + changed_paths = sorted(changed_paths_temp, key=operator.itemgetter('path'), reverse=True) + # Find the action for our working_path in this revision. Use a loop to check in reverse order, + # so that if the target file/folder is "M" but has a parent folder with an "A" copy-from. + working_path_next = working_path + match_d = {} + for v in changed_paths: + d = v['data'] + if not match_d: + match_d = d + path = d['path'] + if d['action'] not in _valid_svn_actions: + raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!" + % (log_entry['revision'], d['action'])) + if d['action'] in 'AR' and d['copyfrom_revision']: + # If we found a copy-from action for a parent path, adjust our + # working_path to follow the rename/copy-from, just like find_svn_ancestors(). + working_path_next = working_path.replace(d['path'], d['copyfrom_path']) + match_d = d + break + if is_child_path(working_path, source_base): + # Only add source_rev's where the path changed in this revision was a child + # of source_base, so that we silently ignore any history that happened on + # non-source_base paths (e.g. ignore branch history if we're only replaying trunk). + is_diff = False + d = match_d + if d['action'] == 'M': + # For action="M", we need to throw out cases where the only change was to + # a property which we ignore, e.g. "svn:mergeinfo". + if d['kind'] == "": + d['kind'] = svnclient.get_kind(source_repos_url, working_path, log_entry['revision'], d['action'], log_entry['changed_paths']) + assert (d['kind'] == 'file') or (d['kind'] == 'dir') + if d['kind'] == 'file': + # Check for file-content changes + # TODO: This should be made ancestor-aware, since the file won't always be at the same path in rev-1 + sum1 = run_shell_command("svn cat -r %s '%s' | md5sum" % (source_rev_tmp, source_repos_url+working_path+"@"+str(source_rev_tmp))) + sum2 = run_shell_command("svn cat -r %s '%s' | md5sum" % (source_rev_tmp-1, source_repos_url+working_path_next+"@"+str(source_rev_tmp-1))) + is_diff = True if sum1 <> sum2 else False + if not is_diff: + # Check for property changes + props1 = svnclient.get_all_props(source_repos_url+working_path, source_rev_tmp) + props2 = svnclient.get_all_props(source_repos_url+working_path_next, source_rev_tmp-1) + # Ignore changes to "svn:mergeinfo", since we don't copy that + if 'svn:mergeinfo' in props1: del props1['svn:mergeinfo'] + if 'svn:mergeinfo' in props2: del props2['svn:mergeinfo'] + for prop in props1: + if prop not in props2 or \ + props1[prop] != props2[prop]: + is_diff = True + break + for prop in props2: + if prop not in props1 or \ + props1[prop] != props2[prop]: + is_diff = True + break + if not is_diff: + ui.status(" verify_commit: skip %s@%s", working_path, source_rev_tmp, level=ui.DEBUG, color='GREEN_B', bold=True) + else: + is_diff = True + if is_diff: + ui.status(" verify_commit: source_revs.append(%s), working_path:%s", source_rev_tmp, working_path, level=ui.DEBUG, color='GREEN_B') + source_revs.append({'path': working_path, 'revision': source_rev_tmp}) + working_path = working_path_next + # Build a list of all the target commits "svn log" returned + target_revs = [] + target_revs_rmndr = [] + for log_entry in target_log_entries: + target_rev_tmp = log_entry['revision'] + ui.status(" verify_commit: target_revs.append(%s)", target_rev_tmp, level=ui.DEBUG, color='GREEN_B') + target_revs.append(target_rev_tmp) + target_revs_rmndr.append(target_rev_tmp) + # Compare the two lists + for d in source_revs: + working_path = d['path'] + source_rev_tmp = d['revision'] + target_rev_tmp = get_rev_map(source_rev_tmp, " ") + working_offset = working_path[len(source_base):].strip("/") + sum1 = run_shell_command("svn cat -r %s '%s' | md5sum" % (source_rev_tmp, source_repos_url+working_path+"@"+str(source_rev_tmp))) + sum2 = run_shell_command("svn cat -r %s '%s' | md5sum" % (target_rev_tmp, target_url+"/"+working_offset+"@"+str(target_rev_tmp))) if target_rev_tmp is not None else "" + #print "source@%s: %s" % (str(source_rev_tmp).ljust(6), sum1) + #print "target@%s: %s" % (str(target_rev_tmp).ljust(6), sum2) + ui.status(" verify_commit: %s: source=%s target=%s", working_offset, source_rev_tmp, target_rev_tmp, level=ui.DEBUG, color='GREEN') + if not target_rev_tmp: + ui.status(" (%s/%s) Verify path: FAIL: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.VERBOSE, color='RED') + raise VerificationError("Unable to find corresponding target_rev for source_rev r%s in rev_map (path_offset='%s')" % (source_rev_tmp, path_offset)) + if target_rev_tmp not in target_revs: + # If found a source_rev with no equivalent target_rev in target_revs, + # check if the only difference in source_rev vs. source_rev-1 is the + # removal/addition of a trailing newline char, since this seems to get + # stripped-out sometimes during the replay (via "svn export"?). + # Strip any trailing \r\n from file-content (http://stackoverflow.com/a/1656218/346778) + sum1 = run_shell_command("svn cat -r %s '%s' | perl -i -p0777we's/\\r\\n\z//' | md5sum" % (source_rev_tmp, source_repos_url+working_path+"@"+str(source_rev_tmp))) + sum2 = run_shell_command("svn cat -r %s '%s' | perl -i -p0777we's/\\r\\n\z//' | md5sum" % (source_rev_tmp-1, source_repos_url+working_path+"@"+str(source_rev_tmp-1))) + if sum1 <> sum2: + ui.status(" (%s/%s) Verify path: FAIL: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.VERBOSE, color='RED') + raise VerificationError("Found source_rev (r%s) with no corresponding target_rev: path_offset='%s'" % (source_rev_tmp, path_offset)) + target_revs_rmndr.remove(target_rev_tmp) + if target_revs_rmndr: + rmndr_list = ", ".join(map(str, target_revs_rmndr)) + ui.status(" (%s/%s) Verify path: FAIL: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.VERBOSE, color='RED') + raise VerificationError("Found one or more *extra* target_revs: path_offset='%s', target_revs='%s'" % (path_offset, rmndr_list)) + ui.status(" (%s/%s) Verify path: OK: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.VERBOSE) + + # Ensure there are no "extra" files in the target side + if options.verify == 2: + target_paths = [] + child_paths = run_svn(["list", "--recursive", "-r", target_rev, target_url+"@"+str(target_rev)]) + child_paths = child_paths.strip("\n").split("\n") + for child_path in child_paths: + if not child_path: + continue + # Directories have a trailing slash in the "svn list" output + child_path_is_dir = True if child_path[-1] == "/" else False + child_path_offset = child_path.rstrip('/') if child_path_is_dir else child_path + if not child_path_is_dir: + target_paths.append(child_path_offset) + # Compare + for path_offset in target_paths: + if not path_offset in check_paths: + raise VerificationError("Path exists in target (@%s) but not source (@%s): %s" % (target_rev, source_rev, path_offset)) + for path_offset in check_paths: + if not path_offset in target_paths: + raise VerificationError("Path exists in source (@%s) but not target (@%s): %s" % (source_rev, target_rev, path_offset)) + def full_svn_revert(): """ Do an "svn revert" and proactively remove any extra files in the working copy. @@ -718,8 +945,9 @@ def real_main(args, parser): source_repos_url = source_info['repos_url'] # e.g. 'http://server/svn/source' source_base = source_url[len(source_repos_url):] # e.g. '/trunk' source_repos_uuid = source_info['repos_uuid'] - global target_repos_url - target_repos_url = target_info['repos_url'] + global target_repos_url,target_base + target_repos_url = target_info['repos_url'] # e.g. 'http://server/svn/target' + target_base = target_url[len(target_repos_url):] # e.g. '/trunk' # Init start and end revision try: @@ -824,6 +1052,8 @@ def real_main(args, parser): set_rev_map(source_start_rev, target_rev) commit_count += 1 target_rev_last = target_rev + if options.verify: + verify_commit(source_rev, target_rev_last) else: # Re-build the rev_map based on any already-replayed history in target_url build_rev_map(target_url, target_rev_last, source_info) @@ -832,7 +1062,7 @@ def real_main(args, parser): source_start_rev = int(max(rev_map, key=rev_map.get)) assert source_start_rev ui.status("Continuing from source revision %s.", source_start_rev, level=ui.VERBOSE) - ui.status("") + ui.status("", level=ui.VERBOSE) if options.keep_revnum and source_start_rev < target_rev_last: parser.error("last target revision is equal-or-higher than starting source revision; " @@ -874,6 +1104,8 @@ def real_main(args, parser): set_rev_map(source_rev, target_rev) target_rev_last = target_rev commit_count += 1 + if options.verify: + verify_commit(source_rev, target_rev_last, log_entry) # Run "svn cleanup" every 100 commits if SVN 1.7+, to clean-up orphaned ".svn/pristines/*" if svn_vers >= 1.7 and (commit_count % 100 == 0): run_svn(["cleanup"]) @@ -881,6 +1113,8 @@ def real_main(args, parser): # If there were no new source_url revisions to process, init source_rev # for the "finally" message below to be the last source revision replayed. source_rev = source_start_rev + if options.verify: + verify_commit(source_start_rev, target_rev_last) except KeyboardInterrupt: print "\nStopped by user." @@ -965,6 +1199,10 @@ Examples: parser.add_option("-n", "--dry-run", action="store_true", dest="dry_run", default=False, help="process next source revision but don't commit changes to " "target working-copy (forces --limit=1)") + parser.add_option("-x", "--verify", action="store_const", const=1, dest="verify", + help="verify ancestry and content for changed paths in commit after every target commit or last target commit") + parser.add_option("-X", "--verify-all", action="store_const", const=2, dest="verify", + help="verify ancestry and content for entire target_url tree after every target commit or last target commit") parser.add_option("--debug", dest="verbosity", const=ui.DEBUG, action="store_const", help="enable debugging output (same as -vvv)") global options diff --git a/tests/make-ref-repo.sh b/tests/make-ref-repo.sh index 627791a..7a1fd89 100755 --- a/tests/make-ref-repo.sh +++ b/tests/make-ref-repo.sh @@ -86,12 +86,15 @@ svn merge -q $BRANCH svn_commit "Test 2: Rename Module/ProjectA/FileA2.txt -> Module/ProjectB/FileB3.txt" # Test #3: Verify rename +# * Test svn:mergeinfo-only change BRANCH="$REPOURL/branches/test3" svn copy -q -m "Create branch" $TRUNK $BRANCH svn switch -q $BRANCH show_last_commit echo "Module/ProjectB/FileB2.txt (Test 3)" >> $WC/Module/ProjectB/FileB2.txt svn propset -q filename FileB2.txt $WC/Module/ProjectB/FileB2.txt +svn propset -q 'svn:mergeinfo' '/branches/foo/Module/ProjectB/FileB1.txt:5' $WC/Module/ProjectB/FileB1.txt +svn propset -q 'svn:mergeinfo' '/branches/foo/Module/ProjectB/FileB2.txt:5' $WC/Module/ProjectB/FileB2.txt svn_commit "Test 3: Verify Module/ProjectB/FileB2.txt" svn switch -q $TRUNK svn merge -q $BRANCH -- 2.45.2 From cb97307237c6431196ed7760ac70d9db23f0ddb7 Mon Sep 17 00:00:00 2001 From: Tony Duckles Date: Sat, 24 Mar 2012 14:31:26 -0500 Subject: [PATCH 14/16] Prevent KeyboardInterrupt's during SVN commit * svn2svn/run/breakhandler.py: Adding * svn2svn/run/svn2svn.py (commit_from_svn_log_entry): Use BreakHandler to ensure that "svn commit" and post-commit rev-prop updating happen as an atomic unit. --- svn2svn/run/breakhandler.py | 111 ++++++++++++++++++++++++++++++++++++ svn2svn/run/svn2svn.py | 14 ++++- 2 files changed, 122 insertions(+), 3 deletions(-) create mode 100644 svn2svn/run/breakhandler.py diff --git a/svn2svn/run/breakhandler.py b/svn2svn/run/breakhandler.py new file mode 100644 index 0000000..bb028ae --- /dev/null +++ b/svn2svn/run/breakhandler.py @@ -0,0 +1,111 @@ +''' +Trap keyboard interrupts. No rights reserved; use at your own risk. + +@author: Stacy Prowell (http://stacyprowell.com) +@url: http://stacyprowell.com/blog/2009/03/30/trapping-ctrlc-in-python/ +''' +import signal + +class BreakHandler: + ''' + Trap CTRL-C, set a flag, and keep going. This is very useful for + gracefully exiting database loops while simulating transactions. + + To use this, make an instance and then enable it. You can check + whether a break was trapped using the trapped property. + + # Create and enable a break handler. + ih = BreakHandler() + ih.enable() + for x in big_set: + complex_operation_1() + complex_operation_2() + complex_operation_3() + # Check whether there was a break. + if ih.trapped: + # Stop the loop. + break + ih.disable() + # Back to usual operation... + ''' + + def __init__(self, emphatic=9): + ''' + Create a new break handler. + + @param emphatic: This is the number of times that the user must + press break to *disable* the handler. If you press + break this number of times, the handler is automagically + disabled, and one more break will trigger an old + style keyboard interrupt. The default is nine. This + is a Good Idea, since if you happen to lose your + connection to the handler you can *still* disable it. + ''' + self._count = 0 + self._enabled = False + self._emphatic = emphatic + self._oldhandler = None + return + + def _reset(self): + ''' + Reset the trapped status and count. You should not need to use this + directly; instead you can disable the handler and then re-enable it. + This is better, in case someone presses CTRL-C during this operation. + ''' + self._count = 0 + return + + def enable(self): + ''' + Enable trapping of the break. This action also resets the + handler count and trapped properties. + ''' + if not self._enabled: + self._reset() + self._enabled = True + self._oldhandler = signal.signal(signal.SIGINT, self) + return + + def disable(self): + ''' + Disable trapping the break. You can check whether a break + was trapped using the count and trapped properties. + ''' + if self._enabled: + self._enabled = False + signal.signal(signal.SIGINT, self._oldhandler) + self._oldhandler = None + return + + def __call__(self, signame, sf): + ''' + An break just occurred. Save information about it and keep + going. + ''' + self._count += 1 + # If we've exceeded the "emphatic" count disable this handler. + if self._count >= self._emphatic: + self.disable() + return + + def __del__(self): + ''' + Python is reclaiming this object, so make sure we are disabled. + ''' + self.disable() + return + + @property + def count(self): + ''' + The number of breaks trapped. + ''' + return self._count + + @property + def trapped(self): + ''' + Whether a break was trapped. + ''' + return self._count > 0 diff --git a/svn2svn/run/svn2svn.py b/svn2svn/run/svn2svn.py index cf30f7e..a284f29 100644 --- a/svn2svn/run/svn2svn.py +++ b/svn2svn/run/svn2svn.py @@ -8,6 +8,7 @@ from .. import svnclient from ..shell import run_svn,run_shell_command from ..errors import (ExternalCommandFailed, UnsupportedSVNAction, InternalError, VerificationError) from parse import HelpFormatter +from breakhandler import BreakHandler import sys import os @@ -88,6 +89,12 @@ def commit_from_svn_log_entry(log_entry, commit_paths=None, target_revprops=None args += list(commit_paths) rev_num = None if not options.dry_run: + # Use BreakHandler class to temporarily redirect SIGINT handler, so that + # "svn commit" + post-commit rev-prop updating is a quasi-atomic unit. + # If user presses Ctrl-C during this, wait until after this full action + # has finished raising the KeyboardInterrupt exception. + bh = BreakHandler() + bh.enable() # Run the "svn commit" command, and screen-scrape the target_rev value (if any) output = run_svn(args) rev_num = parse_svn_commit_rev(output) if output else None @@ -95,6 +102,10 @@ def commit_from_svn_log_entry(log_entry, commit_paths=None, target_revprops=None ui.status("Committed revision %s.", rev_num) if options.keep_date: run_svn(["propset", "--revprop", "-r", rev_num, "svn:date", log_entry['date_raw']]) + bh.disable() + # Check if the user tried to press Ctrl-C + if bh.trapped: + raise KeyboardInterrupt return rev_num def verify_commit(source_rev, target_rev, log_entry=None): @@ -1076,9 +1087,6 @@ def real_main(args, parser): it_log_entries = svnclient.iter_svn_log_entries(source_url, source_start_rev+1, source_end_rev, get_revprops=True, ancestors=source_ancestors) if source_start_rev < source_end_rev else [] source_rev = None - # TODO: Now that commit_from_svn_log_entry() might try to do a "svn propset svn:date", - # we might want some better KeyboardInterupt handilng here, to ensure that - # commit_from_svn_log_entry() always runs as an atomic unit. try: for log_entry in it_log_entries: if options.entries_proc_limit: -- 2.45.2 From a07da8a4e351226a04df300eac81c84eb8ee360b Mon Sep 17 00:00:00 2001 From: Tony Duckles Date: Sat, 24 Mar 2012 15:05:33 -0500 Subject: [PATCH 15/16] For options.keep_author, update via post-commit rev-prop Rather than using "svn commit --username %author%", mirror the source author info by setting the "svn:author" revprop post-commit the same as we do for "svn:date" (options.keep_date). This frees us up to be able to use --username in the future for legitimate source/target repo auth. * svn2svn/run/svn2svn.py (commit_from_svn_log_entry): Update "svn:author" via post-commit revprop rather than --username. * README.mkd: Reflect new "svn:author" handling. --- README.mkd | 10 ++++------ svn2svn/run/svn2svn.py | 6 +++--- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/README.mkd b/README.mkd index 3ad35fe..3395dc7 100644 --- a/README.mkd +++ b/README.mkd @@ -8,12 +8,10 @@ Features an empty target repo or simply an empty folder/branch in the target repo. - **Maintains logical history (when possible)**, e.g. uses "svn copy" for renames. - **Maintains original commit messages**. -- **Optionally maintain source commit authors (`svn:author`)**. Requires non-authenticated - access to the target repo, since this relies upon the `--username` SVN - command-line arg. -- **Optionally maintain source commit timestamps (`svn:date`)**. Requires a - "pre-revprop-change" hook script in the target repo, to be able to change - the "`svn:date`" revprops after target commits have been made. +- **Optionally maintain source commit authors (`svn:author`) and commit timestamps + (`svn:date`)**. Requires a "pre-revprop-change" hook script in the target + repo, to be able to change the "`svn:author`" and "`svn:date`" revprops after + target commits have been made. - **Optionally maintain identical revision #'s between source vs. target repo**. Effectively requires that you're replaying into an empty target repo, or rather that the first source repo revision to be replayed is less than diff --git a/svn2svn/run/svn2svn.py b/svn2svn/run/svn2svn.py index a284f29..db0b0b0 100644 --- a/svn2svn/run/svn2svn.py +++ b/svn2svn/run/svn2svn.py @@ -66,8 +66,6 @@ def commit_from_svn_log_entry(log_entry, commit_paths=None, target_revprops=None message += "\nDate: " + svn_date if options.log_author: message += "\nAuthor: " + log_entry['author'] - if options.keep_author: - args += ["--username", log_entry['author']] args += ["-m", message] revprops = {} if log_entry['revprops']: @@ -102,6 +100,8 @@ def commit_from_svn_log_entry(log_entry, commit_paths=None, target_revprops=None ui.status("Committed revision %s.", rev_num) if options.keep_date: run_svn(["propset", "--revprop", "-r", rev_num, "svn:date", log_entry['date_raw']]) + if options.keep_author: + run_svn(["propset", "--revprop", "-r", rev_num, "svn:author", log_entry['author']]) bh.disable() # Check if the user tried to press Ctrl-C if bh.trapped: @@ -1180,7 +1180,7 @@ Examples: "maintain same commit author, same commit time, and file/dir properties") parser.add_option("-U", "--keep-author", action="store_true", dest="keep_author", default=False, help="maintain same commit authors (svn:author) as source\n" - "(REQUIRES target_url be non-auth'd, e.g. file://-based, since this uses --username to set author)") + "(REQUIRES 'pre-revprop-change' hook script to allow 'svn:author' changes)") parser.add_option("-D", "--keep-date", action="store_true", dest="keep_date", default=False, help="maintain same commit time (svn:date) as source\n" "(REQUIRES 'pre-revprop-change' hook script to allow 'svn:date' changes)") -- 2.45.2 From 120c38e28b57dc8a86ce081cb06caab589339259 Mon Sep 17 00:00:00 2001 From: Tony Duckles Date: Sat, 24 Mar 2012 15:15:19 -0500 Subject: [PATCH 16/16] Be more quiet by default * svn2svn/run/svn2svn.py (disp_svn_log_summary, real_main): Only display source rev info for --verbose mode. * svn2svn/run/svn2svn.py (commit_from_svn_log_entry): Move "Committed revision %s." message down after the post-commit revprops have been updated, since the commit+revprops are an atomic unit. --- svn2svn/run/svn2svn.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/svn2svn/run/svn2svn.py b/svn2svn/run/svn2svn.py index db0b0b0..1e76103 100644 --- a/svn2svn/run/svn2svn.py +++ b/svn2svn/run/svn2svn.py @@ -97,11 +97,11 @@ def commit_from_svn_log_entry(log_entry, commit_paths=None, target_revprops=None output = run_svn(args) rev_num = parse_svn_commit_rev(output) if output else None if rev_num is not None: - ui.status("Committed revision %s.", rev_num) if options.keep_date: run_svn(["propset", "--revprop", "-r", rev_num, "svn:date", log_entry['date_raw']]) if options.keep_author: run_svn(["propset", "--revprop", "-r", rev_num, "svn:author", log_entry['author']]) + ui.status("Committed revision %s.", rev_num) bh.disable() # Check if the user tried to press Ctrl-C if bh.trapped: @@ -932,12 +932,12 @@ def keep_revnum(source_rev, target_rev_last, wc_target_tmp): return target_rev_last def disp_svn_log_summary(log_entry): - ui.status("------------------------------------------------------------------------") + ui.status("------------------------------------------------------------------------", level=ui.VERBOSE) ui.status("r%s | %s | %s", log_entry['revision'], log_entry['author'], - str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' '))) - ui.status(log_entry['message']) + str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' ')), level=ui.VERBOSE) + ui.status(log_entry['message'], level=ui.VERBOSE) def real_main(args, parser): global source_url, target_url, rev_map @@ -1016,7 +1016,7 @@ def real_main(args, parser): # This is the revision we will start from for source_url source_start_rev = int(source_start_log['revision']) ui.status("Starting at source revision %s.", source_start_rev, level=ui.VERBOSE) - ui.status("") + ui.status("", level=ui.VERBOSE) if options.keep_revnum and source_rev > target_rev_last: target_rev_last = keep_revnum(source_rev, target_rev_last, wc_target_tmp) -- 2.45.2