]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn/run/svn2svn.py
Correctly handle source_url/target_url with chars needing URL-encoding
[svn2svn.git] / svn2svn / run / svn2svn.py
1 """
2 Replicate (replay) changesets from one SVN repository to another.
3 """
4
5 from .. import base_version, full_version
6 from .. import ui
7 from .. import shell
8 from .. import svnclient
9 from ..shell import run_svn,run_shell_command
10 from ..errors import (ExternalCommandFailed, UnsupportedSVNAction, InternalError, VerificationError)
11 from parse import HelpFormatter
12 from breakhandler import BreakHandler
13
14 import sys
15 import os
16 import traceback
17 import operator
18 import optparse
19 import re
20 import urllib
21 from datetime import datetime
22
23 _valid_svn_actions = "MARD" # The list of known SVN action abbr's, from "svn log"
24
25 # Module-level variables/parameters
26 source_url = "" # URL to source path in source SVN repo, e.g. 'http://server/svn/source/trunk'
27 source_repos_url = "" # URL to root of source SVN repo, e.g. 'http://server/svn/source'
28 source_base = "" # Relative path of source_url in source SVN repo, e.g. '/trunk'
29 source_repos_uuid = "" # UUID of source SVN repo
30 target_url ="" # URL to target path in target SVN repo, e.g. 'file:///svn/repo_target/trunk'
31 target_repos_url = "" # URL to root of target SVN repo, e.g. 'http://server/svn/target'
32 target_base = "" # Relative path of target_url in target SVN repo, e.g. '/trunk'
33 rev_map = {} # The running mapping-table dictionary for source_url rev #'s -> target_url rev #'s
34 options = None # optparser options
35
36 def parse_svn_commit_rev(output):
37 """
38 Parse the revision number from the output of "svn commit".
39 """
40 output_lines = output.strip("\n").split("\n")
41 rev_num = None
42 for line in output_lines:
43 if line[0:19] == 'Committed revision ':
44 rev_num = line[19:].rstrip('.')
45 break
46 assert rev_num is not None
47 return int(rev_num)
48
49 def commit_from_svn_log_entry(log_entry, commit_paths=None, target_revprops=None):
50 """
51 Given an SVN log entry and an optional list of changed paths, do an svn commit.
52 """
53 # TODO: Run optional external shell hook here, for doing pre-commit filtering
54 # Display the _wc_target "svn status" info if running in -vv (or higher) mode
55 if ui.get_level() >= ui.EXTRA:
56 ui.status(">> commit_from_svn_log_entry: Pre-commit _wc_target status:", level=ui.EXTRA, color='CYAN')
57 ui.status(run_svn(["status"]), level=ui.EXTRA, color='CYAN')
58 # This will use the local timezone for displaying commit times
59 timestamp = int(log_entry['date'])
60 svn_date = str(datetime.fromtimestamp(timestamp))
61 # Uncomment this one one if you prefer UTC commit times
62 #svn_date = "%d 0" % timestamp
63 args = ["commit", "--force-log"]
64 message = log_entry['message']
65 if options.log_date:
66 message += "\nDate: " + svn_date
67 if options.log_author:
68 message += "\nAuthor: " + log_entry['author']
69 args += ["-m", message]
70 revprops = {}
71 if log_entry['revprops']:
72 # Carry forward any revprop's from the source revision
73 for v in log_entry['revprops']:
74 revprops[v['name']] = v['value']
75 if target_revprops:
76 # Add any extra revprop's we want to set for the target repo commits
77 for v in target_revprops:
78 revprops[v['name']] = v['value']
79 if revprops:
80 for key in revprops:
81 args += ["--with-revprop", "%s=%s" % (key, str(revprops[key]))]
82 if commit_paths:
83 if len(commit_paths)<100:
84 # If we don't have an excessive amount of individual changed paths, pass
85 # those to the "svn commit" command. Else, pass nothing so we commit at
86 # the root of the working-copy.
87 for c_path in commit_paths:
88 args += [svnclient.safe_path(c_path)]
89 rev_num = None
90 if not options.dry_run:
91 # Use BreakHandler class to temporarily redirect SIGINT handler, so that
92 # "svn commit" + post-commit rev-prop updating is a quasi-atomic unit.
93 # If user presses Ctrl-C during this, wait until after this full action
94 # has finished raising the KeyboardInterrupt exception.
95 bh = BreakHandler()
96 bh.enable()
97 # Run the "svn commit" command, and screen-scrape the target_rev value (if any)
98 output = run_svn(args)
99 rev_num = parse_svn_commit_rev(output) if output else None
100 if rev_num is not None:
101 if options.keep_date:
102 run_svn(["propset", "--revprop", "-r", rev_num, "svn:date", log_entry['date_raw']])
103 if options.keep_author:
104 run_svn(["propset", "--revprop", "-r", rev_num, "svn:author", log_entry['author']])
105 ui.status("Committed revision %s (source r%s).", rev_num, log_entry['revision'])
106 bh.disable()
107 # Check if the user tried to press Ctrl-C
108 if bh.trapped:
109 raise KeyboardInterrupt
110 return rev_num
111
112 def verify_commit(source_rev, target_rev, log_entry=None):
113 """
114 Compare the ancestry/content/properties between source_url vs target_url
115 for a given revision.
116 """
117 error_cnt = 0
118 # Gather the offsets in the source repo to check
119 check_paths = []
120 remove_paths = []
121 # TODO: Need to make this ancestry aware
122 if options.verify == 1 and log_entry is not None: # Changed only
123 ui.status("Verifying source revision %s (only-changed)...", source_rev, level=ui.VERBOSE)
124 for d in log_entry['changed_paths']:
125 path = d['path']
126 if not is_child_path(path, source_base):
127 continue
128 if d['kind'] == "":
129 d['kind'] = svnclient.get_kind(source_repos_url, path, source_rev, d['action'], log_entry['changed_paths'])
130 assert (d['kind'] == 'file') or (d['kind'] == 'dir')
131 path_is_dir = True if d['kind'] == 'dir' else False
132 path_is_file = True if d['kind'] == 'file' else False
133 path_offset = path[len(source_base):].strip("/")
134 if d['action'] == 'D':
135 remove_paths.append(path_offset)
136 elif not path_offset in check_paths:
137 ui.status("verify_commit: path [mode=changed]: kind=%s: %s", d['kind'], path, level=ui.DEBUG, color='YELLOW')
138 if path_is_file:
139 ui.status(" "+"verify_commit [mode=changed]: check_paths.append('%s')", path_offset, level=ui.DEBUG, color='GREEN')
140 check_paths.append(path_offset)
141 if path_is_dir:
142 if not d['action'] in 'AR':
143 continue
144 child_paths = run_svn(["list", "--recursive", "-r", source_rev, svnclient.safe_path(source_url.rstrip("/")+"/"+path_offset, source_rev)])
145 child_paths = child_paths.strip("\n").split("\n")
146 for child_path in child_paths:
147 if not child_path:
148 continue
149 # Directories have a trailing slash in the "svn list" output
150 child_path_is_dir = True if child_path[-1] == "/" else False
151 child_path_offset = child_path.rstrip('/') if child_path_is_dir else child_path
152 if not child_path_is_dir:
153 # Only check files
154 working_path = (path_offset+"/" if path_offset else "") + child_path_offset
155 if not working_path in check_paths:
156 ui.status(" "+"verify_commit [mode=changed]: check_paths.append('%s'+'/'+'%s')", path_offset, child_path_offset, level=ui.DEBUG, color='GREEN')
157 check_paths.append(working_path)
158 if options.verify == 2: # All paths
159 ui.status("Verifying source revision %s (all)...", source_rev, level=ui.VERBOSE)
160 child_paths = run_svn(["list", "--recursive", "-r", source_rev, svnclient.safe_path(source_url, source_rev)])
161 child_paths = child_paths.strip("\n").split("\n")
162 for child_path in child_paths:
163 if not child_path:
164 continue
165 # Directories have a trailing slash in the "svn list" output
166 child_path_is_dir = True if child_path[-1] == "/" else False
167 child_path_offset = child_path.rstrip('/') if child_path_is_dir else child_path
168 if not child_path_is_dir:
169 # Only check files
170 ui.status("verify_commit [mode=all]: check_paths.append('%s')", child_path_offset, level=ui.DEBUG, color='GREEN')
171 check_paths.append(child_path_offset)
172
173 # If there were any paths deleted in the last revision (options.verify=1 mode),
174 # check that they were correctly deleted.
175 if remove_paths:
176 count_total = len(remove_paths)
177 count = 0
178 for path_offset in remove_paths:
179 count += 1
180 if in_svn(path_offset):
181 ui.status(" (%s/%s) Verify path: FAIL: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.EXTRA, color='RED')
182 ui.status("VerificationError: Path removed in source rev r%s, but still exists in target WC: %s", source_rev, path_offset, color='RED')
183 error_cnt +=1
184 else:
185 ui.status(" (%s/%s) Verify remove: OK: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.EXTRA)
186
187 # Compare each of the check_path entries between source vs. target
188 if check_paths:
189 source_rev_first = int(min(rev_map, key=rev_map.get)) or 1 # The first source_rev we replayed into target
190 ui.status("verify_commit: source_rev_first:%s", source_rev_first, level=ui.DEBUG, color='YELLOW')
191 count_total = len(check_paths)
192 count = 0
193 for path_offset in check_paths:
194 count += 1
195 if count % 500 == 0:
196 ui.status("...processed %s (%s of %s)..." % (count, count, count_total), level=ui.VERBOSE)
197 ui.status("verify_commit: path_offset:%s", path_offset, level=ui.DEBUG, color='YELLOW')
198 source_log_entries = svnclient.run_svn_log(svnclient.safe_path(source_url.rstrip("/")+"/"+path_offset), source_rev, 1, source_rev-source_rev_first+1)
199 target_log_entries = svnclient.run_svn_log(svnclient.safe_path(target_url.rstrip("/")+"/"+path_offset), target_rev, 1, target_rev)
200 # Build a list of commits in source_log_entries which matches our
201 # target path_offset.
202 working_path = source_base+"/"+path_offset
203 source_revs = []
204 for log_entry in source_log_entries:
205 source_rev_tmp = log_entry['revision']
206 if source_rev_tmp < source_rev_first:
207 # Only process source revisions which have been replayed into target
208 break
209 #ui.status(" [verify_commit] source_rev_tmp:%s, working_path:%s\n%s", source_rev_tmp, working_path, pp.pformat(log_entry), level=ui.DEBUG, color='MAGENTA')
210 changed_paths_temp = []
211 for d in log_entry['changed_paths']:
212 path = d['path']
213 # Match working_path or any parents
214 if is_child_path(working_path, path):
215 ui.status(" verify_commit: changed_path: %s %s@%s (parent:%s)", d['action'], path, source_rev_tmp, working_path, level=ui.DEBUG, color='YELLOW')
216 changed_paths_temp.append({'path': path, 'data': d})
217 assert changed_paths_temp
218 # Reverse-sort any matches, so that we start with the most-granular (deepest in the tree) path.
219 changed_paths = sorted(changed_paths_temp, key=operator.itemgetter('path'), reverse=True)
220 # Find the action for our working_path in this revision. Use a loop to check in reverse order,
221 # so that if the target file/folder is "M" but has a parent folder with an "A" copy-from.
222 working_path_next = working_path
223 match_d = {}
224 for v in changed_paths:
225 d = v['data']
226 if not match_d:
227 match_d = d
228 path = d['path']
229 if d['action'] not in _valid_svn_actions:
230 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
231 % (log_entry['revision'], d['action']))
232 if d['action'] in 'AR' and d['copyfrom_revision']:
233 # If we found a copy-from action for a parent path, adjust our
234 # working_path to follow the rename/copy-from, just like find_svn_ancestors().
235 working_path_next = working_path.replace(d['path'], d['copyfrom_path'])
236 match_d = d
237 break
238 if is_child_path(working_path, source_base):
239 # Only add source_rev's where the path changed in this revision was a child
240 # of source_base, so that we silently ignore any history that happened on
241 # non-source_base paths (e.g. ignore branch history if we're only replaying trunk).
242 is_diff = False
243 d = match_d
244 if d['action'] == 'M':
245 # For action="M", we need to throw out cases where the only change was to
246 # a property which we ignore, e.g. "svn:mergeinfo".
247 if d['kind'] == "":
248 d['kind'] = svnclient.get_kind(source_repos_url, working_path, log_entry['revision'], d['action'], log_entry['changed_paths'])
249 assert (d['kind'] == 'file') or (d['kind'] == 'dir')
250 if d['kind'] == 'file':
251 # Check for file-content changes
252 # TODO: This should be made ancestor-aware, since the file won't always be at the same path in rev-1
253 sum1 = run_shell_command("svn cat -r %s '%s' | md5sum" % (source_rev_tmp, source_repos_url+working_path+"@"+str(source_rev_tmp)))
254 sum2 = run_shell_command("svn cat -r %s '%s' | md5sum" % (source_rev_tmp-1, source_repos_url+working_path_next+"@"+str(source_rev_tmp-1)))
255 is_diff = True if sum1 <> sum2 else False
256 if not is_diff:
257 # Check for property changes
258 props1 = svnclient.get_all_props(source_repos_url+working_path, source_rev_tmp)
259 props2 = svnclient.get_all_props(source_repos_url+working_path_next, source_rev_tmp-1)
260 # Ignore changes to "svn:mergeinfo", since we don't copy that
261 if 'svn:mergeinfo' in props1: del props1['svn:mergeinfo']
262 if 'svn:mergeinfo' in props2: del props2['svn:mergeinfo']
263 for prop in props1:
264 if prop not in props2 or \
265 props1[prop] != props2[prop]:
266 is_diff = True
267 break
268 for prop in props2:
269 if prop not in props1 or \
270 props1[prop] != props2[prop]:
271 is_diff = True
272 break
273 if not is_diff:
274 ui.status(" verify_commit: skip %s@%s", working_path, source_rev_tmp, level=ui.DEBUG, color='GREEN_B', bold=True)
275 else:
276 is_diff = True
277 if is_diff:
278 ui.status(" verify_commit: source_revs.append(%s), working_path:%s", source_rev_tmp, working_path, level=ui.DEBUG, color='GREEN_B')
279 source_revs.append({'path': working_path, 'revision': source_rev_tmp})
280 working_path = working_path_next
281 # Build a list of all the target commits "svn log" returned
282 target_revs = []
283 target_revs_rmndr = []
284 for log_entry in target_log_entries:
285 target_rev_tmp = log_entry['revision']
286 ui.status(" verify_commit: target_revs.append(%s)", target_rev_tmp, level=ui.DEBUG, color='GREEN_B')
287 target_revs.append(target_rev_tmp)
288 target_revs_rmndr.append(target_rev_tmp)
289 # Compare the two lists
290 for d in source_revs:
291 working_path = d['path']
292 source_rev_tmp = d['revision']
293 target_rev_tmp = get_rev_map(source_rev_tmp, " ")
294 working_offset = working_path[len(source_base):].strip("/")
295 sum1 = run_shell_command("svn cat -r %s '%s' | md5sum" % (source_rev_tmp, source_repos_url+working_path+"@"+str(source_rev_tmp)))
296 sum2 = run_shell_command("svn cat -r %s '%s' | md5sum" % (target_rev_tmp, target_url+"/"+working_offset+"@"+str(target_rev_tmp))) if target_rev_tmp is not None else ""
297 #print "source@%s: %s" % (str(source_rev_tmp).ljust(6), sum1)
298 #print "target@%s: %s" % (str(target_rev_tmp).ljust(6), sum2)
299 ui.status(" verify_commit: %s: source=%s target=%s", working_offset, source_rev_tmp, target_rev_tmp, level=ui.DEBUG, color='GREEN')
300 if not target_rev_tmp:
301 ui.status(" (%s/%s) Verify path: FAIL: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.EXTRA, color='RED')
302 ui.status("VerificationError: Unable to find corresponding target_rev for source_rev r%s in rev_map (path_offset='%s')", source_rev_tmp, path_offset, color='RED')
303 error_cnt +=1
304 continue
305 if target_rev_tmp not in target_revs:
306 # If found a source_rev with no equivalent target_rev in target_revs,
307 # check if the only difference in source_rev vs. source_rev-1 is the
308 # removal/addition of a trailing newline char, since this seems to get
309 # stripped-out sometimes during the replay (via "svn export"?).
310 # Strip any trailing \r\n from file-content (http://stackoverflow.com/a/1656218/346778)
311 sum1 = run_shell_command("svn cat -r %s '%s' | perl -i -p0777we's/\\r\\n\z//' | md5sum" % (source_rev_tmp, source_repos_url+working_path+"@"+str(source_rev_tmp)))
312 sum2 = run_shell_command("svn cat -r %s '%s' | perl -i -p0777we's/\\r\\n\z//' | md5sum" % (source_rev_tmp-1, source_repos_url+working_path+"@"+str(source_rev_tmp-1)))
313 if sum1 <> sum2:
314 ui.status(" (%s/%s) Verify path: FAIL: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.EXTRA, color='RED')
315 ui.status("VerificationError: Found source_rev (r%s) with no corresponding target_rev: path_offset='%s'", source_rev_tmp, path_offset, color='RED')
316 error_cnt +=1
317 continue
318 target_revs_rmndr.remove(target_rev_tmp)
319 if target_revs_rmndr:
320 rmndr_list = ", ".join(map(str, target_revs_rmndr))
321 ui.status(" (%s/%s) Verify path: FAIL: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.EXTRA, color='RED')
322 ui.status("VerificationError: Found one or more *extra* target_revs: path_offset='%s', target_revs='%s'", path_offset, rmndr_list, color='RED')
323 error_cnt +=1
324 ui.status(" (%s/%s) Verify path: OK: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.EXTRA)
325
326 # Ensure there are no "extra" files in the target side
327 if options.verify == 2:
328 target_paths = []
329 child_paths = run_svn(["list", "--recursive", "-r", target_rev, svnclient.safe_path(target_url, target_rev)])
330 child_paths = child_paths.strip("\n").split("\n")
331 for child_path in child_paths:
332 if not child_path:
333 continue
334 # Directories have a trailing slash in the "svn list" output
335 child_path_is_dir = True if child_path[-1] == "/" else False
336 child_path_offset = child_path.rstrip('/') if child_path_is_dir else child_path
337 if not child_path_is_dir:
338 target_paths.append(child_path_offset)
339 # Compare
340 for path_offset in target_paths:
341 if not path_offset in check_paths:
342 ui.status("VerificationError: Path exists in target (@%s) but not source (@%s): %s", target_rev, source_rev, path_offset, color='RED')
343 error_cnt += 1
344 for path_offset in check_paths:
345 if not path_offset in target_paths:
346 ui.status("VerificationError: Path exists in source (@%s) but not target (@%s): %s", source_rev, target_rev, path_offset, color='RED')
347 error_cnt += 1
348
349 if error_cnt > 0:
350 raise VerificationError("Found %s verification errors" % (error_cnt))
351 ui.status("Verified revision %s (%s).", target_rev, "all" if options.verify == 2 else "only-changed")
352
353 def full_svn_revert():
354 """
355 Do an "svn revert" and proactively remove any extra files in the working copy.
356 """
357 run_svn(["revert", "--recursive", "."])
358 output = run_svn(["status"])
359 if output:
360 output_lines = output.strip("\n").split("\n")
361 for line in output_lines:
362 if line[0] == "?":
363 path = line[4:].strip(" ")
364 if os.path.isfile(path):
365 os.remove(path)
366 if os.path.isdir(path):
367 shell.rmtree(path)
368
369 def gen_tracking_revprops(source_rev):
370 """
371 Build an array of svn2svn-specific source-tracking revprops.
372 """
373 revprops = [{'name':'svn2svn:source_uuid', 'value':source_repos_uuid},
374 {'name':'svn2svn:source_url', 'value':urllib.quote(source_url, ":/")},
375 {'name':'svn2svn:source_rev', 'value':source_rev}]
376 return revprops
377
378 def sync_svn_props(source_url, source_rev, path_offset):
379 """
380 Carry-forward any unversioned properties from the source repo to the
381 target WC.
382 """
383 source_props = svnclient.get_all_props(join_path(source_url, path_offset), source_rev)
384 target_props = svnclient.get_all_props(path_offset)
385 if 'svn:mergeinfo' in source_props:
386 # Never carry-forward "svn:mergeinfo"
387 del source_props['svn:mergeinfo']
388 for prop in target_props:
389 if prop not in source_props:
390 # Remove any properties which exist in target but not source
391 run_svn(["propdel", prop, svnclient.safe_path(path_offset)])
392 for prop in source_props:
393 if prop not in target_props or \
394 source_props[prop] != target_props[prop]:
395 # Set/update any properties which exist in source but not target or
396 # whose value differs between source vs. target.
397 run_svn(["propset", prop, source_props[prop], svnclient.safe_path(path_offset)])
398
399 def in_svn(p, require_in_repo=False, prefix=""):
400 """
401 Check if a given file/folder is being tracked by Subversion.
402 Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
403 With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
404 Use "svn status" to check the status of the file/folder.
405 """
406 entries = svnclient.get_svn_status(p, no_recursive=True)
407 if not entries:
408 return False
409 d = entries[0]
410 if require_in_repo and (d['status'] == 'added' or d['revision'] is None):
411 # If caller requires this path to be in the SVN repo, prevent returning True
412 # for paths that are only locally-added.
413 ret = False
414 else:
415 # Don't consider files tracked as deleted in the WC as under source-control.
416 # Consider files which are locally added/copied as under source-control.
417 ret = True if not (d['status'] == 'deleted') and (d['type'] == 'normal' or d['status'] == 'added' or d['copied'] == 'true') else False
418 ui.status(prefix + ">> in_svn('%s', require_in_repo=%s) --> %s", p, str(require_in_repo), str(ret), level=ui.DEBUG, color='GREEN')
419 return ret
420
421 def is_child_path(path, p_path):
422 return True if (path == p_path) or (path.startswith(p_path+"/")) else False
423
424 def join_path(base, child):
425 base.rstrip('/')
426 return base+"/"+child if child else base
427
428 def find_svn_ancestors(svn_repos_url, start_path, start_rev, stop_base_path=None, prefix=""):
429 """
430 Given an initial starting path+rev, walk the SVN history backwards to inspect the
431 ancestry of that path, optionally seeing if it traces back to stop_base_path.
432
433 Build an array of copyfrom_path and copyfrom_revision pairs for each of the "svn copy"'s.
434 If we find a copyfrom_path which stop_base_path is a substring match of (e.g. we crawled
435 back to the initial branch-copy from trunk), then return the collection of ancestor
436 paths. Otherwise, copyfrom_path has no ancestry compared to stop_base_path.
437
438 This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
439 file/folder was renamed in a branch and then that branch was merged back to trunk.
440
441 'svn_repos_url' is the full URL to the root of the SVN repository,
442 e.g. 'file:///path/to/repo'
443 'start_path' is the path in the SVN repo to the source path to start checking
444 ancestry at, e.g. '/branches/fix1/projectA/file1.txt'.
445 'start_rev' is the revision to start walking the history of start_path backwards from.
446 'stop_base_path' is the path in the SVN repo to stop tracing ancestry once we've reached,
447 i.e. the target path we're trying to trace ancestry back to, e.g. '/trunk'.
448 """
449 ui.status(prefix + ">> find_svn_ancestors: Start: (%s) start_path: %s stop_base_path: %s",
450 svn_repos_url, start_path+"@"+str(start_rev), stop_base_path, level=ui.DEBUG, color='YELLOW')
451 done = False
452 no_ancestry = False
453 cur_path = start_path
454 cur_rev = start_rev
455 first_iter_done = False
456 ancestors = []
457 while not done:
458 # Get the first "svn log" entry for cur_path (relative to @cur_rev)
459 ui.status(prefix + ">> find_svn_ancestors: %s", svn_repos_url+cur_path+"@"+str(cur_rev), level=ui.DEBUG, color='YELLOW')
460 log_entry = svnclient.get_first_svn_log_entry(svn_repos_url+cur_path, 1, cur_rev)
461 if not log_entry:
462 ui.status(prefix + ">> find_svn_ancestors: Done: no log_entry", level=ui.DEBUG, color='YELLOW')
463 done = True
464 break
465 # If we found a copy-from case which matches our stop_base_path, we're done.
466 # ...but only if we've at least tried to search for the first copy-from path.
467 if stop_base_path is not None and first_iter_done and is_child_path(cur_path, stop_base_path):
468 ui.status(prefix + ">> find_svn_ancestors: Done: Found is_child_path(cur_path, stop_base_path) and first_iter_done=True", level=ui.DEBUG, color='YELLOW')
469 done = True
470 break
471 first_iter_done = True
472 # Search for any actions on our target path (or parent paths).
473 changed_paths_temp = []
474 for d in log_entry['changed_paths']:
475 path = d['path']
476 if is_child_path(cur_path, path):
477 changed_paths_temp.append({'path': path, 'data': d})
478 if not changed_paths_temp:
479 # If no matches, then we've hit the end of the ancestry-chain.
480 ui.status(prefix + ">> find_svn_ancestors: Done: No matching changed_paths", level=ui.DEBUG, color='YELLOW')
481 done = True
482 continue
483 # Reverse-sort any matches, so that we start with the most-granular (deepest in the tree) path.
484 changed_paths = sorted(changed_paths_temp, key=operator.itemgetter('path'), reverse=True)
485 # Find the action for our cur_path in this revision. Use a loop to check in reverse order,
486 # so that if the target file/folder is "M" but has a parent folder with an "A" copy-from
487 # then we still correctly match the deepest copy-from.
488 for v in changed_paths:
489 d = v['data']
490 path = d['path']
491 # Check action-type for this file
492 action = d['action']
493 if action not in _valid_svn_actions:
494 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
495 % (log_entry['revision'], action))
496 ui.status(prefix + "> %s %s%s", action, path,
497 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
498 level=ui.DEBUG, color='YELLOW')
499 if action == 'D':
500 # If file/folder was deleted, ancestry-chain stops here
501 if stop_base_path:
502 no_ancestry = True
503 ui.status(prefix + ">> find_svn_ancestors: Done: deleted", level=ui.DEBUG, color='YELLOW')
504 done = True
505 break
506 if action in 'RA':
507 # If file/folder was added/replaced but not a copy, ancestry-chain stops here
508 if not d['copyfrom_path']:
509 if stop_base_path:
510 no_ancestry = True
511 ui.status(prefix + ">> find_svn_ancestors: Done: %s with no copyfrom_path",
512 "Added" if action == "A" else "Replaced",
513 level=ui.DEBUG, color='YELLOW')
514 done = True
515 break
516 # Else, file/folder was added/replaced and is a copy, so add an entry to our ancestors list
517 # and keep checking for ancestors
518 ui.status(prefix + ">> find_svn_ancestors: Found copy-from (action=%s): %s --> %s",
519 action, path, d['copyfrom_path']+"@"+str(d['copyfrom_revision']),
520 level=ui.DEBUG, color='YELLOW')
521 ancestors.append({'path': cur_path, 'revision': log_entry['revision'],
522 'copyfrom_path': cur_path.replace(d['path'], d['copyfrom_path']), 'copyfrom_rev': d['copyfrom_revision']})
523 cur_path = cur_path.replace(d['path'], d['copyfrom_path'])
524 cur_rev = d['copyfrom_revision']
525 # Follow the copy and keep on searching
526 break
527 if stop_base_path and no_ancestry:
528 # If we're tracing back ancestry to a specific target stop_base_path and
529 # the ancestry-chain stopped before we reached stop_base_path, then return
530 # nothing since there is no ancestry chaining back to that target.
531 ancestors = []
532 if ancestors:
533 if ui.get_level() >= ui.DEBUG:
534 max_len = 0
535 for idx in range(len(ancestors)):
536 d = ancestors[idx]
537 max_len = max(max_len, len(d['path']+"@"+str(d['revision'])))
538 ui.status(prefix + ">> find_svn_ancestors: Found parent ancestors:", level=ui.DEBUG, color='YELLOW_B')
539 for idx in range(len(ancestors)):
540 d = ancestors[idx]
541 ui.status(prefix + " [%s] %s --> %s", idx,
542 str(d['path']+"@"+str(d['revision'])).ljust(max_len),
543 str(d['copyfrom_path']+"@"+str(d['copyfrom_rev'])),
544 level=ui.DEBUG, color='YELLOW')
545 else:
546 ui.status(prefix + ">> find_svn_ancestors: No ancestor-chain found: %s",
547 svn_repos_url+start_path+"@"+str(start_rev), level=ui.DEBUG, color='YELLOW')
548 return ancestors
549
550 def get_rev_map(source_rev, prefix):
551 """
552 Find the equivalent rev # in the target repo for the given rev # from the source repo.
553 """
554 ui.status(prefix + ">> get_rev_map(%s)", source_rev, level=ui.DEBUG, color='GREEN')
555 # Find the highest entry less-than-or-equal-to source_rev
556 for rev in range(int(source_rev), 0, -1):
557 in_rev_map = True if rev in rev_map else False
558 ui.status(prefix + ">> get_rev_map: rev=%s in_rev_map=%s", rev, str(in_rev_map), level=ui.DEBUG, color='BLACK_B')
559 if in_rev_map:
560 return int(rev_map[rev])
561 # Else, we fell off the bottom of the rev_map. Ruh-roh...
562 return None
563
564 def set_rev_map(source_rev, target_rev):
565 #ui.status(">> set_rev_map: source_rev=%s target_rev=%s", source_rev, target_rev, level=ui.DEBUG, color='GREEN')
566 global rev_map
567 rev_map[int(source_rev)]=int(target_rev)
568
569 def build_rev_map(target_url, target_end_rev, source_info):
570 """
571 Check for any already-replayed history from source_url (source_info) and
572 build the mapping-table of source_rev -> target_rev.
573 """
574 global rev_map
575 rev_map = {}
576 ui.status("Rebuilding target_rev -> source_rev rev_map...", level=ui.VERBOSE)
577 proc_count = 0
578 it_log_entries = svnclient.iter_svn_log_entries(target_url, 1, target_end_rev, get_changed_paths=False, get_revprops=True)
579 for log_entry in it_log_entries:
580 if log_entry['revprops']:
581 revprops = {}
582 for v in log_entry['revprops']:
583 if v['name'].startswith('svn2svn:'):
584 revprops[v['name']] = v['value']
585 if revprops and \
586 revprops['svn2svn:source_uuid'] == source_info['repos_uuid'] and \
587 revprops['svn2svn:source_url'] == urllib.quote(source_info['url'], ":/"):
588 source_rev = revprops['svn2svn:source_rev']
589 target_rev = log_entry['revision']
590 set_rev_map(source_rev, target_rev)
591 proc_count += 1
592 if proc_count % 500 == 0:
593 ui.status("...processed %s (%s of %s)..." % (proc_count, target_rev, target_end_rev), level=ui.VERBOSE)
594
595 def get_svn_dirlist(svn_path, rev_number=None):
596 """
597 Get a list of all the child contents (recusive) of the given folder path.
598 """
599 args = ["list"]
600 if rev_number is not None:
601 args += ["-r", rev_number]
602 args += [svnclient.safe_path(svn_path, rev_number)]
603 paths = run_svn(args, no_fail=True)
604 paths = paths.strip("\n").split("\n") if len(paths)>1 else []
605 return paths
606
607 def path_in_list(paths, path):
608 for p in paths:
609 if is_child_path(path, p):
610 return True
611 return False
612
613 def add_path(paths, path):
614 if not path_in_list(paths, path):
615 paths.append(path)
616
617 def in_ancestors(ancestors, ancestor):
618 match = True
619 for idx in range(len(ancestors)-1, 0, -1):
620 if int(ancestors[idx]['revision']) > ancestor['revision']:
621 match = is_child_path(ancestor['path'], ancestors[idx]['path'])
622 break
623 return match
624
625 def do_svn_add(source_url, path_offset, source_rev, source_ancestors, \
626 parent_copyfrom_path="", parent_copyfrom_rev="", \
627 export_paths={}, is_dir = False, skip_paths=[], prefix = ""):
628 """
629 Given the add'd source path, replay the "svn add/copy" commands to correctly
630 track renames across copy-from's.
631
632 For example, consider a sequence of events like this:
633 1. svn copy /trunk /branches/fix1
634 2. (Make some changes on /branches/fix1)
635 3. svn mv /branches/fix1/Proj1 /branches/fix1/Proj2 " Rename folder
636 4. svn mv /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt " Rename file inside renamed folder
637 5. svn co /trunk && svn merge /branches/fix1
638 After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
639 and and add of /trunk/Proj2 copy-from /branches/fix1/Proj2. If we were just
640 to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
641 we'd lose the logical history that Proj2/file2.txt is really a descendant
642 of Proj1/file1.txt.
643
644 'path_offset' is the offset from source_base to the file to check ancestry for,
645 e.g. 'projectA/file1.txt'. path = source_repos_url + source_base + path_offset.
646 'source_rev' is the revision ("svn log") that we're processing from the source repo.
647 'parent_copyfrom_path' and 'parent_copyfrom_rev' is the copy-from path of the parent
648 directory, when being called recursively by do_svn_add_dir().
649 'export_paths' is the list of path_offset's that we've deferred running "svn export" on.
650 'is_dir' is whether path_offset is a directory (rather than a file).
651 """
652 source_base = source_url[len(source_repos_url):] # e.g. '/trunk'
653 ui.status(prefix + ">> do_svn_add: %s %s", join_path(source_base, path_offset)+"@"+str(source_rev),
654 " (parent-copyfrom: "+parent_copyfrom_path+"@"+str(parent_copyfrom_rev)+")" if parent_copyfrom_path else "",
655 level=ui.DEBUG, color='GREEN')
656 # Check if the given path has ancestors which chain back to the current source_base
657 found_ancestor = False
658 ancestors = find_svn_ancestors(source_repos_url, join_path(source_base, path_offset), source_rev, stop_base_path=source_base, prefix=prefix+" ")
659 ancestor = ancestors[len(ancestors)-1] if ancestors else None # Choose the eldest ancestor, i.e. where we reached stop_base_path=source_base
660 if ancestor and not in_ancestors(source_ancestors, ancestor):
661 ancestor = None
662 copyfrom_path = ancestor['copyfrom_path'] if ancestor else ""
663 copyfrom_rev = ancestor['copyfrom_rev'] if ancestor else ""
664 if ancestor:
665 # The copy-from path has ancestry back to source_url.
666 ui.status(prefix + ">> do_svn_add: Check copy-from: Found parent: %s", copyfrom_path+"@"+str(copyfrom_rev),
667 level=ui.DEBUG, color='GREEN', bold=True)
668 found_ancestor = True
669 # Map the copyfrom_rev (source repo) to the equivalent target repo rev #. This can
670 # return None in the case where copyfrom_rev is *before* our source_start_rev.
671 tgt_rev = get_rev_map(copyfrom_rev, prefix+" ")
672 ui.status(prefix + ">> do_svn_add: get_rev_map: %s (source) -> %s (target)", copyfrom_rev, tgt_rev, level=ui.DEBUG, color='GREEN')
673 else:
674 ui.status(prefix + ">> do_svn_add: Check copy-from: No ancestor chain found.", level=ui.DEBUG, color='GREEN')
675 found_ancestor = False
676 if found_ancestor and tgt_rev:
677 # Check if this path_offset in the target WC already has this ancestry, in which
678 # case there's no need to run the "svn copy" (again).
679 path_in_svn = in_svn(path_offset, prefix=prefix+" ")
680 log_entry = svnclient.get_last_svn_log_entry(path_offset, 1, 'HEAD', get_changed_paths=False) if in_svn(path_offset, require_in_repo=True, prefix=prefix+" ") else []
681 if (not log_entry or (log_entry['revision'] != tgt_rev)):
682 copyfrom_offset = copyfrom_path[len(source_base):].strip('/')
683 ui.status(prefix + ">> do_svn_add: svn_copy: Copy-from: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
684 ui.status(prefix + " copyfrom: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
685 ui.status(prefix + " p_copyfrom: %s", parent_copyfrom_path+"@"+str(parent_copyfrom_rev) if parent_copyfrom_path else "", level=ui.DEBUG, color='GREEN')
686 if path_in_svn and \
687 ((parent_copyfrom_path and is_child_path(copyfrom_path, parent_copyfrom_path)) and \
688 (parent_copyfrom_rev and copyfrom_rev == parent_copyfrom_rev)):
689 # When being called recursively, if this child entry has the same ancestor as the
690 # the parent, then no need to try to run another "svn copy".
691 ui.status(prefix + ">> do_svn_add: svn_copy: Same ancestry as parent: %s",
692 parent_copyfrom_path+"@"+str(parent_copyfrom_rev),level=ui.DEBUG, color='GREEN')
693 pass
694 else:
695 # Copy this path from the equivalent path+rev in the target repo, to create the
696 # equivalent history.
697 if parent_copyfrom_path:
698 # If we have a parent copy-from path, we mis-match that so display a status
699 # message describing the action we're mimic'ing. If path_in_svn, then this
700 # is logically a "replace" rather than an "add".
701 ui.status(" %s %s (from %s)", ('R' if path_in_svn else 'A'), join_path(source_base, path_offset), ancestors[0]['copyfrom_path']+"@"+str(copyfrom_rev), level=ui.VERBOSE)
702 if path_in_svn:
703 # If local file is already under version-control, then this is a replace.
704 ui.status(prefix + ">> do_svn_add: pre-copy: local path already exists: %s", path_offset, level=ui.DEBUG, color='GREEN')
705 run_svn(["update", svnclient.safe_path(path_offset)])
706 run_svn(["remove", "--force", svnclient.safe_path(path_offset)])
707 run_svn(["copy", "-r", tgt_rev, svnclient.safe_path(join_path(target_url, copyfrom_offset), tgt_rev), svnclient.safe_path(path_offset)])
708 if is_dir:
709 # Export the final verison of all files in this folder.
710 add_path(export_paths, path_offset)
711 else:
712 # Export the final verison of this file.
713 run_svn(["export", "--force", "-r", source_rev,
714 svnclient.safe_path(source_repos_url+join_path(source_base, path_offset), source_rev), svnclient.safe_path(path_offset)])
715 if options.keep_prop:
716 sync_svn_props(source_url, source_rev, path_offset)
717 else:
718 ui.status(prefix + ">> do_svn_add: Skipped 'svn copy': %s", path_offset, level=ui.DEBUG, color='GREEN')
719 else:
720 # Else, either this copy-from path has no ancestry back to source_url OR copyfrom_rev comes
721 # before our initial source_start_rev (i.e. tgt_rev == None), so can't do a "svn copy".
722 # Create (parent) directory if needed.
723 # TODO: This is (nearly) a duplicate of code in process_svn_log_entry(). Should this be
724 # split-out to a shared tag?
725 p_path = path_offset if is_dir else os.path.dirname(path_offset).strip() or None
726 if p_path and not os.path.exists(p_path):
727 run_svn(["mkdir", svnclient.safe_path(p_path)])
728 if not in_svn(path_offset, prefix=prefix+" "):
729 if is_dir:
730 # Export the final verison of all files in this folder.
731 add_path(export_paths, path_offset)
732 else:
733 # Export the final verison of this file. We *need* to do this before running
734 # the "svn add", even if we end-up re-exporting this file again via export_paths.
735 run_svn(["export", "--force", "-r", source_rev,
736 svnclient.safe_path(source_repos_url+join_path(source_base, path_offset), source_rev), svnclient.safe_path(path_offset)])
737 # If not already under version-control, then "svn add" this file/folder.
738 run_svn(["add", "--parents", svnclient.safe_path(path_offset)])
739 if options.keep_prop:
740 sync_svn_props(source_url, source_rev, path_offset)
741 if is_dir:
742 # For any folders that we process, process any child contents, so that we correctly
743 # replay copies/replaces/etc.
744 do_svn_add_dir(source_url, path_offset, source_rev, source_ancestors,
745 copyfrom_path, copyfrom_rev, export_paths, skip_paths, prefix+" ")
746
747 def do_svn_add_dir(source_url, path_offset, source_rev, source_ancestors, \
748 parent_copyfrom_path, parent_copyfrom_rev, \
749 export_paths, skip_paths, prefix=""):
750 source_base = source_url[len(source_repos_url):] # e.g. '/trunk'
751 # Get the directory contents, to compare between the local WC (target_url) vs. the remote repo (source_url)
752 # TODO: paths_local won't include add'd paths because "svn ls" lists the contents of the
753 # associated remote repo folder. (Is this a problem?)
754 paths_local = get_svn_dirlist(path_offset)
755 paths_remote = get_svn_dirlist(join_path(source_url, path_offset), source_rev)
756 ui.status(prefix + ">> do_svn_add_dir: paths_local: %s", str(paths_local), level=ui.DEBUG, color='GREEN')
757 ui.status(prefix + ">> do_svn_add_dir: paths_remote: %s", str(paths_remote), level=ui.DEBUG, color='GREEN')
758 # Update files/folders which exist in remote but not local
759 for path in paths_remote:
760 path_is_dir = True if path[-1] == "/" else False
761 working_path = join_path(path_offset, (path.rstrip('/') if path_is_dir else path)).lstrip('/')
762 #print "working_path:%s = path_offset:%s + path:%s" % (working_path, path_offset, path)
763 if not working_path in skip_paths:
764 do_svn_add(source_url, working_path, source_rev, source_ancestors,
765 parent_copyfrom_path, parent_copyfrom_rev,
766 export_paths, path_is_dir, skip_paths, prefix+" ")
767 # Remove files/folders which exist in local but not remote
768 for path in paths_local:
769 if not path in paths_remote:
770 path_is_dir = True if path[-1] == "/" else False
771 working_path = join_path(path_offset, (path.rstrip('/') if path_is_dir else path)).lstrip('/')
772 ui.status(" %s %s", 'D', join_path(source_base, working_path), level=ui.VERBOSE)
773 run_svn(["update", svnclient.safe_path(working_path)])
774 run_svn(["remove", "--force", svnclient.safe_path(working_path)])
775 # TODO: Does this handle deleted folders too? Wouldn't want to have a case
776 # where we only delete all files from folder but leave orphaned folder around.
777
778 def process_svn_log_entry(log_entry, ancestors, commit_paths, prefix = ""):
779 """
780 Process SVN changes from the given log entry. Build an array (commit_paths)
781 of the paths in the working-copy that were changed, i.e. the paths which
782 we'll pass to "svn commit".
783 """
784 export_paths = []
785 source_rev = log_entry['revision']
786 source_url = log_entry['url']
787 source_base = source_url[len(source_repos_url):] # e.g. '/trunk'
788 ui.status(prefix + ">> process_svn_log_entry: %s", source_url+"@"+str(source_rev), level=ui.DEBUG, color='GREEN')
789 for d in log_entry['changed_paths']:
790 # Get the full path for this changed_path
791 # e.g. '/branches/bug123/projectA/file1.txt'
792 path = d['path']
793 if not is_child_path(path, source_base):
794 # Ignore changed files that are not part of this subdir
795 ui.status(prefix + ">> process_svn_log_entry: Unrelated path: %s (base: %s)", path, source_base, level=ui.DEBUG, color='GREEN')
796 continue
797 if d['kind'] == "" or d['kind'] == 'none':
798 # The "kind" value was introduced in SVN 1.6, and "svn log --xml" won't return a "kind"
799 # value for commits made on a pre-1.6 repo, even if the server is now running 1.6.
800 # We need to use other methods to fetch the node-kind for these cases.
801 d['kind'] = svnclient.get_kind(source_repos_url, path, source_rev, d['action'], log_entry['changed_paths'])
802 assert (d['kind'] == 'file') or (d['kind'] == 'dir')
803 path_is_dir = True if d['kind'] == 'dir' else False
804 path_is_file = True if d['kind'] == 'file' else False
805 # Calculate the offset (based on source_base) for this changed_path
806 # e.g. 'projectA/file1.txt'
807 # (path = source_base + "/" + path_offset)
808 path_offset = path[len(source_base):].strip("/")
809 # Get the action for this path
810 action = d['action']
811 if action not in _valid_svn_actions:
812 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
813 % (source_rev, action))
814 ui.status(" %s %s%s", action, d['path'],
815 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
816 level=ui.VERBOSE)
817
818 # Try to be efficient and keep track of an explicit list of paths in the
819 # working copy that changed. If we commit from the root of the working copy,
820 # then SVN needs to crawl the entire working copy looking for pending changes.
821 commit_paths.append(path_offset)
822
823 # Special-handling for replace's
824 if action == 'R':
825 # If file was "replaced" (deleted then re-added, all in same revision),
826 # then we need to run the "svn rm" first, then change action='A'. This
827 # lets the normal code below handle re-"svn add"'ing the files. This
828 # should replicate the "replace".
829 if path_offset and in_svn(path_offset):
830 # Target path might not be under version-control yet, e.g. parent "add"
831 # was a copy-from a branch which had no ancestry back to trunk, and each
832 # child folder under that parent folder is a "replace" action on the final
833 # merge to trunk. Since the child folders will be in skip_paths, do_svn_add
834 # wouldn't have created them while processing the parent "add" path.
835 if path_is_dir:
836 # Need to "svn update" before "svn remove" in case child contents are at
837 # a higher rev than the (parent) path_offset.
838 run_svn(["update", svnclient.safe_path(path_offset)])
839 run_svn(["remove", "--force", svnclient.safe_path(path_offset)])
840 action = 'A'
841
842 # Handle all the various action-types
843 # (Handle "add" first, for "svn copy/move" support)
844 if action == 'A':
845 # Determine where to export from.
846 svn_copy = False
847 # Handle cases where this "add" was a copy from another URL in the source repo
848 if d['copyfrom_revision']:
849 copyfrom_path = d['copyfrom_path']
850 copyfrom_rev = d['copyfrom_revision']
851 skip_paths = []
852 for tmp_d in log_entry['changed_paths']:
853 tmp_path = tmp_d['path']
854 if is_child_path(tmp_path, path) and tmp_d['action'] in 'ARD':
855 # Build list of child entries which are also in the changed_paths list,
856 # so that do_svn_add() can skip processing these entries when recursing
857 # since we'll end-up processing them later. Don't include action="M" paths
858 # in this list because it's non-conclusive: it could just mean that the
859 # file was modified *after* the copy-from, so we still want do_svn_add()
860 # to re-create the correct ancestry.
861 tmp_path_offset = tmp_path[len(source_base):].strip("/")
862 skip_paths.append(tmp_path_offset)
863 do_svn_add(source_url, path_offset, source_rev, ancestors, "", "", export_paths, path_is_dir, skip_paths, prefix+" ")
864 # Else just "svn export" the files from the source repo and "svn add" them.
865 else:
866 # Create (parent) directory if needed
867 p_path = path_offset if path_is_dir else os.path.dirname(path_offset).strip() or None
868 if p_path and not os.path.exists(p_path):
869 run_svn(["mkdir", svnclient.safe_path(p_path)])
870 # Export the entire added tree.
871 if path_is_dir:
872 # For directories, defer the (recurisve) "svn export". Might have a
873 # situation in a branch merge where the entry in the svn-log is a
874 # non-copy-from'd "add" but there are child contents (that we haven't
875 # gotten to yet in log_entry) that are copy-from's. When we try do
876 # the "svn copy" later on in do_svn_add() for those copy-from'd paths,
877 # having pre-existing (svn-add'd) contents creates some trouble.
878 # Instead, just create the stub folders ("svn mkdir" above) and defer
879 # exporting the final file-state until the end.
880 add_path(export_paths, path_offset)
881 else:
882 # Export the final verison of this file. We *need* to do this before running
883 # the "svn add", even if we end-up re-exporting this file again via export_paths.
884 run_svn(["export", "--force", "-r", source_rev,
885 svnclient.safe_path(join_path(source_url, path_offset), source_rev), svnclient.safe_path(path_offset)])
886 if not in_svn(path_offset, prefix=prefix+" "):
887 # Need to use in_svn here to handle cases where client committed the parent
888 # folder and each indiv sub-folder.
889 run_svn(["add", "--parents", svnclient.safe_path(path_offset)])
890 if options.keep_prop:
891 sync_svn_props(source_url, source_rev, path_offset)
892
893 elif action == 'D':
894 if path_is_dir:
895 # For dirs, need to "svn update" before "svn remove" because the final
896 # "svn commit" will fail if the parent (path_offset) is at a lower rev
897 # than any of the child contents. This needs to be a recursive update.
898 run_svn(["update", svnclient.safe_path(path_offset)])
899 run_svn(["remove", "--force", svnclient.safe_path(path_offset)])
900
901 elif action == 'M':
902 if path_is_file:
903 run_svn(["export", "--force", "-N" , "-r", source_rev,
904 svnclient.safe_path(join_path(source_url, path_offset), source_rev), svnclient.safe_path(path_offset)])
905 if path_is_dir:
906 # For dirs, need to "svn update" before export/prop-sync because the
907 # final "svn commit" will fail if the parent is at a lower rev than
908 # child contents. Just need to update the rev-state of the dir (d['path']),
909 # don't need to recursively update all child contents.
910 # (??? is this the right reason?)
911 run_svn(["update", "-N", svnclient.safe_path(path_offset)])
912 if options.keep_prop:
913 sync_svn_props(source_url, source_rev, path_offset)
914
915 else:
916 raise InternalError("Internal Error: process_svn_log_entry: Unhandled 'action' value: '%s'"
917 % action)
918
919 # Export the final version of all add'd paths from source_url
920 if export_paths:
921 for path_offset in export_paths:
922 run_svn(["export", "--force", "-r", source_rev,
923 svnclient.safe_path(join_path(source_url, path_offset), source_rev), svnclient.safe_path(path_offset)])
924
925 def keep_revnum(source_rev, target_rev_last, wc_target_tmp):
926 """
927 Add "padding" target revisions as needed to keep source and target
928 revision #'s identical.
929 """
930 bh = BreakHandler()
931 if int(source_rev) <= int(target_rev_last):
932 raise InternalError("keep-revnum mode is enabled, "
933 "but source revision (r%s) is less-than-or-equal last target revision (r%s)" % \
934 (source_rev, target_rev_last))
935 if int(target_rev_last) < int(source_rev)-1:
936 # Add "padding" target revisions to keep source and target rev #'s identical
937 if os.path.exists(wc_target_tmp):
938 shell.rmtree(wc_target_tmp)
939 run_svn(["checkout", "-r", "HEAD", "--depth=empty", svnclient.safe_path(target_repos_url, "HEAD"), svnclient.safe_path(wc_target_tmp)])
940 for rev_num in range(int(target_rev_last)+1, int(source_rev)):
941 run_svn(["propset", "svn2svn:keep-revnum", rev_num, svnclient.safe_path(wc_target_tmp)])
942 # Prevent Ctrl-C's during this inner part, so we'll always display
943 # the "Commit revision ..." message if we ran a "svn commit".
944 bh.enable()
945 output = run_svn(["commit", "-m", "", svnclient.safe_path(wc_target_tmp)])
946 rev_num_tmp = parse_svn_commit_rev(output) if output else None
947 assert rev_num == rev_num_tmp
948 ui.status("Committed revision %s (keep-revnum).", rev_num)
949 bh.disable()
950 # Check if the user tried to press Ctrl-C
951 if bh.trapped:
952 raise KeyboardInterrupt
953 target_rev_last = rev_num
954 shell.rmtree(wc_target_tmp)
955 return target_rev_last
956
957 def disp_svn_log_summary(log_entry):
958 ui.status("------------------------------------------------------------------------", level=ui.VERBOSE)
959 ui.status("r%s | %s | %s",
960 log_entry['revision'],
961 log_entry['author'],
962 str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' ')), level=ui.VERBOSE)
963 ui.status(log_entry['message'], level=ui.VERBOSE)
964
965 def real_main(args):
966 global source_url, target_url, rev_map
967 # Use urllib.unquote() to URL-decode source_url/target_url values.
968 # All URLs passed to run_svn() should go through svnclient.safe_path()
969 # and we don't want to end-up *double* urllib.quote'ing if the user-
970 # supplied source/target URL's are already URL-encoded.
971 source_url = urllib.unquote(args.pop(0).rstrip("/")) # e.g. 'http://server/svn/source/trunk'
972 target_url = urllib.unquote(args.pop(0).rstrip("/")) # e.g. 'file:///svn/target/trunk'
973 ui.status("options: %s", str(options), level=ui.DEBUG, color='GREEN')
974
975 # Make sure that both the source and target URL's are valid
976 source_info = svnclient.get_svn_info(source_url)
977 assert is_child_path(source_url, source_info['repos_url'])
978 target_info = svnclient.get_svn_info(target_url)
979 assert is_child_path(target_url, target_info['repos_url'])
980
981 # Init global vars
982 global source_repos_url,source_base,source_repos_uuid
983 source_repos_url = source_info['repos_url'] # e.g. 'http://server/svn/source'
984 source_base = source_url[len(source_repos_url):] # e.g. '/trunk'
985 source_repos_uuid = source_info['repos_uuid']
986 global target_repos_url,target_base
987 target_repos_url = target_info['repos_url'] # e.g. 'http://server/svn/target'
988 target_base = target_url[len(target_repos_url):] # e.g. '/trunk'
989
990 # Init start and end revision
991 try:
992 source_start_rev = svnclient.get_svn_rev(source_repos_url, options.rev_start if options.rev_start else 1)
993 except ExternalCommandFailed:
994 print "Error: Invalid start source revision value: %s" % (options.rev_start)
995 sys.exit(1)
996 try:
997 source_end_rev = svnclient.get_svn_rev(source_repos_url, options.rev_end if options.rev_end else "HEAD")
998 except ExternalCommandFailed:
999 print "Error: Invalid end source revision value: %s" % (options.rev_end)
1000 sys.exit(1)
1001 ui.status("Using source revision range %s:%s", source_start_rev, source_end_rev, level=ui.VERBOSE)
1002
1003 # TODO: If options.keep_date, should we try doing a "svn propset" on an *existing* revision
1004 # as a sanity check, so we check if the pre-revprop-change hook script is correctly setup
1005 # before doing first replay-commit?
1006
1007 target_rev_last = target_info['revision'] # Last revision # in the target repo
1008 wc_target = os.path.abspath('_wc_target')
1009 wc_target_tmp = os.path.abspath('_wc_target_tmp')
1010 num_entries_proc = 0
1011 commit_count = 0
1012 source_rev = None
1013 target_rev = None
1014
1015 # Check out a working copy of target_url if needed
1016 wc_exists = os.path.exists(wc_target)
1017 if wc_exists and not options.cont_from_break:
1018 shell.rmtree(wc_target)
1019 wc_exists = False
1020 if not wc_exists:
1021 ui.status("Checking-out _wc_target...", level=ui.VERBOSE)
1022 svnclient.svn_checkout(target_url, wc_target)
1023 os.chdir(wc_target)
1024 if wc_exists:
1025 # If using an existing WC, make sure it's clean ("svn revert")
1026 ui.status("Cleaning-up _wc_target...", level=ui.VERBOSE)
1027 run_svn(["cleanup"])
1028 full_svn_revert()
1029
1030 if not options.cont_from_break:
1031 # Warn user if trying to start (non-continue) into a non-empty target path
1032 if not options.force_nocont:
1033 top_paths = run_svn(["list", "-r", "HEAD", svnclient.safe_path(target_url, "HEAD")])
1034 if len(top_paths)>0:
1035 print "Error: Trying to replay (non-continue-mode) into a non-empty target_url location. " \
1036 "Use --force if you're sure this is what you want."
1037 sys.exit(1)
1038 # Get the first log entry at/after source_start_rev, which is where
1039 # we'll do the initial import from.
1040 source_ancestors = find_svn_ancestors(source_repos_url, source_base, source_end_rev, prefix=" ")
1041 it_log_start = svnclient.iter_svn_log_entries(source_url, source_start_rev, source_end_rev, get_changed_paths=False, ancestors=source_ancestors)
1042 source_start_log = None
1043 for log_entry in it_log_start:
1044 # Pick the first entry. Need to use a "for ..." loop since we're using an iterator.
1045 source_start_log = log_entry
1046 break
1047 if not source_start_log:
1048 raise InternalError("Unable to find any matching revisions between %s:%s in source_url: %s" % \
1049 (source_start_rev, source_end_rev, source_url))
1050
1051 # This is the revision we will start from for source_url
1052 source_start_rev = int(source_start_log['revision'])
1053 ui.status("Starting at source revision %s.", source_start_rev, level=ui.VERBOSE)
1054 ui.status("", level=ui.VERBOSE)
1055 if options.keep_revnum and source_rev > target_rev_last:
1056 target_rev_last = keep_revnum(source_rev, target_rev_last, wc_target_tmp)
1057
1058 # For the initial commit to the target URL, export all the contents from
1059 # the source URL at the start-revision.
1060 disp_svn_log_summary(svnclient.get_one_svn_log_entry(source_repos_url, source_start_rev, source_start_rev))
1061 # Export and add file-contents from source_url@source_start_rev
1062 source_start_url = source_url if not source_ancestors else source_repos_url+source_ancestors[len(source_ancestors)-1]['copyfrom_path']
1063 top_paths = run_svn(["list", "-r", source_start_rev, svnclient.safe_path(source_start_url, source_start_rev)])
1064 top_paths = top_paths.strip("\n").split("\n")
1065 for path in top_paths:
1066 # For each top-level file/folder...
1067 if not path:
1068 continue
1069 # Directories have a trailing slash in the "svn list" output
1070 path_is_dir = True if path[-1] == "/" else False
1071 path_offset = path.rstrip('/') if path_is_dir else path
1072 if in_svn(path_offset, prefix=" "):
1073 raise InternalError("Cannot replay history on top of pre-existing structure: %s" % join_path(source_start_url, path_offset))
1074 if path_is_dir and not os.path.exists(path_offset):
1075 os.makedirs(path_offset)
1076 run_svn(["export", "--force", "-r" , source_start_rev, svnclient.safe_path(join_path(source_start_url, path_offset), source_start_rev), svnclient.safe_path(path_offset)])
1077 run_svn(["add", svnclient.safe_path(path_offset)])
1078 # Update any properties on the newly added content
1079 paths = run_svn(["list", "--recursive", "-r", source_start_rev, svnclient.safe_path(source_start_url, source_start_rev)])
1080 paths = paths.strip("\n").split("\n")
1081 if options.keep_prop:
1082 sync_svn_props(source_start_url, source_start_rev, "")
1083 for path in paths:
1084 if not path:
1085 continue
1086 # Directories have a trailing slash in the "svn list" output
1087 path_is_dir = True if path[-1] == "/" else False
1088 path_offset = path.rstrip('/') if path_is_dir else path
1089 ui.status(" A %s", join_path(source_base, path_offset), level=ui.VERBOSE)
1090 if options.keep_prop:
1091 sync_svn_props(source_start_url, source_start_rev, path_offset)
1092 # Commit the initial import
1093 num_entries_proc += 1
1094 target_revprops = gen_tracking_revprops(source_start_rev) # Build source-tracking revprop's
1095 target_rev = commit_from_svn_log_entry(source_start_log, target_revprops=target_revprops)
1096 if target_rev:
1097 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
1098 set_rev_map(source_start_rev, target_rev)
1099 commit_count += 1
1100 target_rev_last = target_rev
1101 if options.verify:
1102 verify_commit(source_rev, target_rev_last)
1103 else:
1104 # Re-build the rev_map based on any already-replayed history in target_url
1105 build_rev_map(target_url, target_rev_last, source_info)
1106 if not rev_map:
1107 print "Error: Called with continue-mode, but no already-replayed source history found in target_url."
1108 sys.exit(1)
1109 source_start_rev = int(max(rev_map, key=rev_map.get))
1110 assert source_start_rev
1111 ui.status("Continuing from source revision %s.", source_start_rev, level=ui.VERBOSE)
1112 ui.status("", level=ui.VERBOSE)
1113
1114 svn_vers_t = svnclient.get_svn_client_version()
1115 svn_vers = float(".".join(map(str, svn_vers_t[0:2])))
1116
1117 # Load SVN log starting from source_start_rev + 1
1118 source_ancestors = find_svn_ancestors(source_repos_url, source_base, source_end_rev, prefix=" ")
1119 it_log_entries = svnclient.iter_svn_log_entries(source_url, source_start_rev+1, source_end_rev, get_revprops=True, ancestors=source_ancestors) if source_start_rev < source_end_rev else []
1120 source_rev_last = source_start_rev
1121
1122 try:
1123 for log_entry in it_log_entries:
1124 if options.entries_proc_limit:
1125 if num_entries_proc >= options.entries_proc_limit:
1126 break
1127 # Replay this revision from source_url into target_url
1128 source_rev = log_entry['revision']
1129 log_url = log_entry['url']
1130 #print "source_url:%s log_url:%s" % (source_url, log_url)
1131 if options.keep_revnum:
1132 if source_rev < target_rev_last:
1133 print "Error: Last target revision (r%s) is equal-or-higher than starting source revision (r%s). " \
1134 "Cannot use --keep-revnum mode." % (target_rev_last, source_start_rev)
1135 sys.exit(1)
1136 target_rev_last = keep_revnum(source_rev, target_rev_last, wc_target_tmp)
1137 disp_svn_log_summary(log_entry)
1138 # Process all the changed-paths in this log entry
1139 commit_paths = []
1140 process_svn_log_entry(log_entry, source_ancestors, commit_paths)
1141 num_entries_proc += 1
1142 # Commit any changes made to _wc_target
1143 target_revprops = gen_tracking_revprops(source_rev) # Build source-tracking revprop's
1144 target_rev = commit_from_svn_log_entry(log_entry, commit_paths, target_revprops=target_revprops)
1145 source_rev_last = source_rev
1146 if target_rev:
1147 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
1148 source_rev = log_entry['revision']
1149 set_rev_map(source_rev, target_rev)
1150 target_rev_last = target_rev
1151 commit_count += 1
1152 if options.verify:
1153 verify_commit(source_rev, target_rev_last, log_entry)
1154 # Run "svn cleanup" every 100 commits if SVN 1.7+, to clean-up orphaned ".svn/pristines/*"
1155 if svn_vers >= 1.7 and (commit_count % 100 == 0):
1156 run_svn(["cleanup"])
1157 if source_rev_last == source_start_rev:
1158 # If there were no new source_url revisions to process, still trigger
1159 # "full-mode" verify check (if enabled).
1160 if options.verify:
1161 verify_commit(source_rev_last, target_rev_last)
1162
1163 except KeyboardInterrupt:
1164 print "\nStopped by user."
1165 print "\nCleaning-up..."
1166 run_svn(["cleanup"])
1167 full_svn_revert()
1168 except:
1169 print "\nCommand failed with following error:\n"
1170 traceback.print_exc()
1171 print "\nCleaning-up..."
1172 run_svn(["cleanup"])
1173 print run_svn(["status"])
1174 full_svn_revert()
1175 finally:
1176 print "\nFinished at source revision %s%s." % (source_rev_last, " (dry-run)" if options.dry_run else "")
1177
1178 def main():
1179 # Defined as entry point. Must be callable without arguments.
1180 usage = "svn2svn, version %s\n" % str(full_version) + \
1181 "<http://nynim.org/projects/svn2svn> <https://github.com/tonyduckles/svn2svn>\n\n" + \
1182 "Usage: %prog [OPTIONS] source_url target_url\n"
1183 description = """\
1184 Replicate (replay) history from one SVN repository to another. Maintain
1185 logical ancestry wherever possible, so that 'svn log' on the replayed repo
1186 will correctly follow file/folder renames.
1187
1188 Examples:
1189 Create a copy of only /trunk from source repo, starting at r5000
1190 $ svnadmin create /svn/target
1191 $ svn mkdir -m 'Add trunk' file:///svn/target/trunk
1192 $ svn2svn -av -r 5000 http://server/source/trunk file:///svn/target/trunk
1193 1. The target_url will be checked-out to ./_wc_target
1194 2. The first commit to http://server/source/trunk at/after r5000 will be
1195 exported & added into _wc_target
1196 3. All revisions affecting http://server/source/trunk (starting at r5000)
1197 will be replayed to _wc_target. Any add/copy/move/replaces that are
1198 copy-from'd some path outside of /trunk (e.g. files renamed on a
1199 /branch and branch was merged into /trunk) will correctly maintain
1200 logical ancestry where possible.
1201
1202 Use continue-mode (-c) to pick-up where the last run left-off
1203 $ svn2svn -avc http://server/source/trunk file:///svn/target/trunk
1204 1. The target_url will be checked-out to ./_wc_target, if not already
1205 checked-out
1206 2. All new revisions affecting http://server/source/trunk starting from
1207 the last replayed revision to file:///svn/target/trunk (based on the
1208 svn2svn:* revprops) will be replayed to _wc_target, maintaining all
1209 logical ancestry where possible."""
1210 parser = optparse.OptionParser(usage, description=description,
1211 formatter=HelpFormatter(), version="%prog "+str(full_version))
1212 parser.add_option("-v", "--verbose", dest="verbosity", action="count", default=1,
1213 help="enable additional output (use -vv or -vvv for more)")
1214 parser.add_option("-a", "--archive", action="store_true", dest="archive", default=False,
1215 help="archive/mirror mode; same as -UDP (see REQUIRE's below)\n"
1216 "maintain same commit author, same commit time, and file/dir properties")
1217 parser.add_option("-U", "--keep-author", action="store_true", dest="keep_author", default=False,
1218 help="maintain same commit authors (svn:author) as source\n"
1219 "(REQUIRES 'pre-revprop-change' hook script to allow 'svn:author' changes)")
1220 parser.add_option("-D", "--keep-date", action="store_true", dest="keep_date", default=False,
1221 help="maintain same commit time (svn:date) as source\n"
1222 "(REQUIRES 'pre-revprop-change' hook script to allow 'svn:date' changes)")
1223 parser.add_option("-P", "--keep-prop", action="store_true", dest="keep_prop", default=False,
1224 help="maintain same file/dir SVN properties as source")
1225 parser.add_option("-R", "--keep-revnum", action="store_true", dest="keep_revnum", default=False,
1226 help="maintain same rev #'s as source. creates placeholder target "
1227 "revisions (by modifying a 'svn2svn:keep-revnum' property at the root of the target repo)")
1228 parser.add_option("-c", "--continue", action="store_true", dest="cont_from_break",
1229 help="continue from last source commit to target (based on svn2svn:* revprops)")
1230 parser.add_option("-f", "--force", action="store_true", dest="force_nocont",
1231 help="allow replaying into a non-empty target folder")
1232 parser.add_option("-r", "--revision", type="string", dest="revision", metavar="ARG",
1233 help="revision range to replay from source_url\n"
1234 "A revision argument can be one of:\n"
1235 " START start rev # (end will be 'HEAD')\n"
1236 " START:END start and ending rev #'s\n"
1237 "Any revision # formats which SVN understands are "
1238 "supported, e.g. 'HEAD', '{2010-01-31}', etc.")
1239 parser.add_option("-u", "--log-author", action="store_true", dest="log_author", default=False,
1240 help="append source commit author to replayed commit mesages")
1241 parser.add_option("-d", "--log-date", action="store_true", dest="log_date", default=False,
1242 help="append source commit time to replayed commit messages")
1243 parser.add_option("-l", "--limit", type="int", dest="entries_proc_limit", metavar="NUM",
1244 help="maximum number of source revisions to process")
1245 parser.add_option("-n", "--dry-run", action="store_true", dest="dry_run", default=False,
1246 help="process next source revision but don't commit changes to "
1247 "target working-copy (forces --limit=1)")
1248 parser.add_option("-x", "--verify", action="store_const", const=1, dest="verify",
1249 help="verify ancestry and content for changed paths in commit after every target commit or last target commit")
1250 parser.add_option("-X", "--verify-all", action="store_const", const=2, dest="verify",
1251 help="verify ancestry and content for entire target_url tree after every target commit or last target commit")
1252 parser.add_option("--debug", dest="verbosity", const=ui.DEBUG, action="store_const",
1253 help="enable debugging output (same as -vvv)")
1254 global options
1255 options, args = parser.parse_args()
1256 if len(args) != 2:
1257 parser.error("incorrect number of arguments")
1258 if options.verbosity < 10:
1259 # Expand multiple "-v" arguments to a real ui._level value
1260 options.verbosity *= 10
1261 if options.dry_run:
1262 # When in dry-run mode, only try to process the next log_entry
1263 options.entries_proc_limit = 1
1264 options.rev_start = None
1265 options.rev_end = None
1266 if options.revision:
1267 # Reg-ex for matching a revision arg (http://svnbook.red-bean.com/en/1.5/svn.tour.revs.specifiers.html#svn.tour.revs.dates)
1268 rev_patt = '[0-9A-Z]+|\{[0-9A-Za-z/\\ :-]+\}'
1269 rev = None
1270 match = re.match('^('+rev_patt+'):('+rev_patt+')$', options.revision) # First try start:end match
1271 if match is None: match = re.match('^('+rev_patt+')$', options.revision) # Next, try start match
1272 if match is None:
1273 parser.error("unexpected --revision argument format; see 'svn help log' for valid revision formats")
1274 rev = match.groups()
1275 options.rev_start = rev[0] if len(rev)>0 else None
1276 options.rev_end = rev[1] if len(rev)>1 else None
1277 if options.archive:
1278 options.keep_author = True
1279 options.keep_date = True
1280 options.keep_prop = True
1281 ui.update_config(options)
1282 return real_main(args)
1283
1284
1285 if __name__ == "__main__":
1286 sys.exit(main() or 0)