]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn/run/svn2svn.py
Use shell.rmtree
[svn2svn.git] / svn2svn / run / svn2svn.py
1 """
2 Replicate (replay) changesets from one SVN repository to another.
3 """
4
5 from .. import base_version, full_version
6 from .. import ui
7 from .. import shell
8 from .. import svnclient
9 from ..shell import run_svn,run_shell_command
10 from ..errors import (ExternalCommandFailed, UnsupportedSVNAction, InternalError, VerificationError)
11 from parse import HelpFormatter
12 from breakhandler import BreakHandler
13
14 import sys
15 import os
16 import traceback
17 import operator
18 import optparse
19 import re
20 import urllib
21 from datetime import datetime
22
23 _valid_svn_actions = "MARD" # The list of known SVN action abbr's, from "svn log"
24
25 # Module-level variables/parameters
26 source_url = "" # URL to source path in source SVN repo, e.g. 'http://server/svn/source/trunk'
27 source_repos_url = "" # URL to root of source SVN repo, e.g. 'http://server/svn/source'
28 source_base = "" # Relative path of source_url in source SVN repo, e.g. '/trunk'
29 source_repos_uuid = "" # UUID of source SVN repo
30 target_url ="" # URL to target path in target SVN repo, e.g. 'file:///svn/repo_target/trunk'
31 target_repos_url = "" # URL to root of target SVN repo, e.g. 'http://server/svn/target'
32 target_base = "" # Relative path of target_url in target SVN repo, e.g. '/trunk'
33 rev_map = {} # The running mapping-table dictionary for source_url rev #'s -> target_url rev #'s
34 options = None # optparser options
35
36 def parse_svn_commit_rev(output):
37 """
38 Parse the revision number from the output of "svn commit".
39 """
40 output_lines = output.strip("\n").split("\n")
41 rev_num = None
42 for line in output_lines:
43 if line[0:19] == 'Committed revision ':
44 rev_num = line[19:].rstrip('.')
45 break
46 assert rev_num is not None
47 return int(rev_num)
48
49 def commit_from_svn_log_entry(log_entry, commit_paths=None, target_revprops=None):
50 """
51 Given an SVN log entry and an optional list of changed paths, do an svn commit.
52 """
53 # TODO: Run optional external shell hook here, for doing pre-commit filtering
54 # Display the _wc_target "svn status" info if running in -vv (or higher) mode
55 if ui.get_level() >= ui.EXTRA:
56 ui.status(">> commit_from_svn_log_entry: Pre-commit _wc_target status:", level=ui.EXTRA, color='CYAN')
57 ui.status(run_svn(["status"]), level=ui.EXTRA, color='CYAN')
58 # This will use the local timezone for displaying commit times
59 timestamp = int(log_entry['date'])
60 svn_date = str(datetime.fromtimestamp(timestamp))
61 # Uncomment this one one if you prefer UTC commit times
62 #svn_date = "%d 0" % timestamp
63 args = ["commit", "--force-log"]
64 message = log_entry['message']
65 if options.log_date:
66 message += "\nDate: " + svn_date
67 if options.log_author:
68 message += "\nAuthor: " + log_entry['author']
69 args += ["-m", message]
70 revprops = {}
71 if log_entry['revprops']:
72 # Carry forward any revprop's from the source revision
73 for v in log_entry['revprops']:
74 revprops[v['name']] = v['value']
75 if target_revprops:
76 # Add any extra revprop's we want to set for the target repo commits
77 for v in target_revprops:
78 revprops[v['name']] = v['value']
79 if revprops:
80 for key in revprops:
81 args += ["--with-revprop", "%s=%s" % (key, str(revprops[key]))]
82 if commit_paths:
83 if len(commit_paths)<100:
84 # If we don't have an excessive amount of individual changed paths, pass
85 # those to the "svn commit" command. Else, pass nothing so we commit at
86 # the root of the working-copy.
87 args += list(commit_paths)
88 rev_num = None
89 if not options.dry_run:
90 # Use BreakHandler class to temporarily redirect SIGINT handler, so that
91 # "svn commit" + post-commit rev-prop updating is a quasi-atomic unit.
92 # If user presses Ctrl-C during this, wait until after this full action
93 # has finished raising the KeyboardInterrupt exception.
94 bh = BreakHandler()
95 bh.enable()
96 # Run the "svn commit" command, and screen-scrape the target_rev value (if any)
97 output = run_svn(args)
98 rev_num = parse_svn_commit_rev(output) if output else None
99 if rev_num is not None:
100 if options.keep_date:
101 run_svn(["propset", "--revprop", "-r", rev_num, "svn:date", log_entry['date_raw']])
102 if options.keep_author:
103 run_svn(["propset", "--revprop", "-r", rev_num, "svn:author", log_entry['author']])
104 ui.status("Committed revision %s (source r%s).", rev_num, log_entry['revision'])
105 bh.disable()
106 # Check if the user tried to press Ctrl-C
107 if bh.trapped:
108 raise KeyboardInterrupt
109 return rev_num
110
111 def verify_commit(source_rev, target_rev, log_entry=None):
112 """
113 Compare the ancestry/content/properties between source_url vs target_url
114 for a given revision.
115 """
116 error_cnt = 0
117 # Gather the offsets in the source repo to check
118 check_paths = []
119 remove_paths = []
120 # TODO: Need to make this ancestry aware
121 if options.verify == 1 and log_entry is not None: # Changed only
122 ui.status("Verifying source revision %s (only-changed)...", source_rev, level=ui.VERBOSE)
123 for d in log_entry['changed_paths']:
124 path = d['path']
125 if not is_child_path(path, source_base):
126 continue
127 if d['kind'] == "":
128 d['kind'] = svnclient.get_kind(source_repos_url, path, source_rev, d['action'], log_entry['changed_paths'])
129 assert (d['kind'] == 'file') or (d['kind'] == 'dir')
130 path_is_dir = True if d['kind'] == 'dir' else False
131 path_is_file = True if d['kind'] == 'file' else False
132 path_offset = path[len(source_base):].strip("/")
133 if d['action'] == 'D':
134 remove_paths.append(path_offset)
135 elif not path_offset in check_paths:
136 ui.status("verify_commit: path [mode=changed]: kind=%s: %s", d['kind'], path, level=ui.DEBUG, color='YELLOW')
137 if path_is_file:
138 ui.status(" "+"verify_commit [mode=changed]: check_paths.append('%s')", path_offset, level=ui.DEBUG, color='GREEN')
139 check_paths.append(path_offset)
140 if path_is_dir:
141 if not d['action'] in 'AR':
142 continue
143 child_paths = run_svn(["list", "--recursive", "-r", source_rev, source_url.rstrip("/")+"/"+path_offset+"@"+str(source_rev)])
144 child_paths = child_paths.strip("\n").split("\n")
145 for child_path in child_paths:
146 if not child_path:
147 continue
148 # Directories have a trailing slash in the "svn list" output
149 child_path_is_dir = True if child_path[-1] == "/" else False
150 child_path_offset = child_path.rstrip('/') if child_path_is_dir else child_path
151 if not child_path_is_dir:
152 # Only check files
153 working_path = (path_offset+"/" if path_offset else "") + child_path_offset
154 if not working_path in check_paths:
155 ui.status(" "+"verify_commit [mode=changed]: check_paths.append('%s'+'/'+'%s')", path_offset, child_path_offset, level=ui.DEBUG, color='GREEN')
156 check_paths.append(working_path)
157 if options.verify == 2: # All paths
158 ui.status("Verifying source revision %s (all)...", source_rev, level=ui.VERBOSE)
159 child_paths = run_svn(["list", "--recursive", "-r", source_rev, source_url+"@"+str(source_rev)])
160 child_paths = child_paths.strip("\n").split("\n")
161 for child_path in child_paths:
162 if not child_path:
163 continue
164 # Directories have a trailing slash in the "svn list" output
165 child_path_is_dir = True if child_path[-1] == "/" else False
166 child_path_offset = child_path.rstrip('/') if child_path_is_dir else child_path
167 if not child_path_is_dir:
168 # Only check files
169 ui.status("verify_commit [mode=all]: check_paths.append('%s')", child_path_offset, level=ui.DEBUG, color='GREEN')
170 check_paths.append(child_path_offset)
171
172 # If there were any paths deleted in the last revision (options.verify=1 mode),
173 # check that they were correctly deleted.
174 if remove_paths:
175 count_total = len(remove_paths)
176 count = 0
177 for path_offset in remove_paths:
178 count += 1
179 if in_svn(path_offset):
180 ui.status(" (%s/%s) Verify path: FAIL: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.EXTRA, color='RED')
181 ui.status("VerificationError: Path removed in source rev r%s, but still exists in target WC: %s", source_rev, path_offset, color='RED')
182 error_cnt +=1
183 else:
184 ui.status(" (%s/%s) Verify remove: OK: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.EXTRA)
185
186 # Compare each of the check_path entries between source vs. target
187 if check_paths:
188 source_rev_first = int(min(rev_map, key=rev_map.get)) or 1 # The first source_rev we replayed into target
189 ui.status("verify_commit: source_rev_first:%s", source_rev_first, level=ui.DEBUG, color='YELLOW')
190 count_total = len(check_paths)
191 count = 0
192 for path_offset in check_paths:
193 count += 1
194 if count % 500 == 0:
195 ui.status("...processed %s (%s of %s)..." % (count, count, count_total), level=ui.VERBOSE)
196 ui.status("verify_commit: path_offset:%s", path_offset, level=ui.DEBUG, color='YELLOW')
197 source_log_entries = svnclient.run_svn_log(source_url.rstrip("/")+"/"+path_offset+"@"+str(source_rev), source_rev, 1, source_rev-source_rev_first+1)
198 target_log_entries = svnclient.run_svn_log(target_url.rstrip("/")+"/"+path_offset+"@"+str(target_rev), target_rev, 1, target_rev)
199 # Build a list of commits in source_log_entries which matches our
200 # target path_offset.
201 working_path = source_base+"/"+path_offset
202 source_revs = []
203 for log_entry in source_log_entries:
204 source_rev_tmp = log_entry['revision']
205 if source_rev_tmp < source_rev_first:
206 # Only process source revisions which have been replayed into target
207 break
208 #ui.status(" [verify_commit] source_rev_tmp:%s, working_path:%s\n%s", source_rev_tmp, working_path, pp.pformat(log_entry), level=ui.DEBUG, color='MAGENTA')
209 changed_paths_temp = []
210 for d in log_entry['changed_paths']:
211 path = d['path']
212 # Match working_path or any parents
213 if is_child_path(working_path, path):
214 ui.status(" verify_commit: changed_path: %s %s@%s (parent:%s)", d['action'], path, source_rev_tmp, working_path, level=ui.DEBUG, color='YELLOW')
215 changed_paths_temp.append({'path': path, 'data': d})
216 assert changed_paths_temp
217 # Reverse-sort any matches, so that we start with the most-granular (deepest in the tree) path.
218 changed_paths = sorted(changed_paths_temp, key=operator.itemgetter('path'), reverse=True)
219 # Find the action for our working_path in this revision. Use a loop to check in reverse order,
220 # so that if the target file/folder is "M" but has a parent folder with an "A" copy-from.
221 working_path_next = working_path
222 match_d = {}
223 for v in changed_paths:
224 d = v['data']
225 if not match_d:
226 match_d = d
227 path = d['path']
228 if d['action'] not in _valid_svn_actions:
229 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
230 % (log_entry['revision'], d['action']))
231 if d['action'] in 'AR' and d['copyfrom_revision']:
232 # If we found a copy-from action for a parent path, adjust our
233 # working_path to follow the rename/copy-from, just like find_svn_ancestors().
234 working_path_next = working_path.replace(d['path'], d['copyfrom_path'])
235 match_d = d
236 break
237 if is_child_path(working_path, source_base):
238 # Only add source_rev's where the path changed in this revision was a child
239 # of source_base, so that we silently ignore any history that happened on
240 # non-source_base paths (e.g. ignore branch history if we're only replaying trunk).
241 is_diff = False
242 d = match_d
243 if d['action'] == 'M':
244 # For action="M", we need to throw out cases where the only change was to
245 # a property which we ignore, e.g. "svn:mergeinfo".
246 if d['kind'] == "":
247 d['kind'] = svnclient.get_kind(source_repos_url, working_path, log_entry['revision'], d['action'], log_entry['changed_paths'])
248 assert (d['kind'] == 'file') or (d['kind'] == 'dir')
249 if d['kind'] == 'file':
250 # Check for file-content changes
251 # TODO: This should be made ancestor-aware, since the file won't always be at the same path in rev-1
252 sum1 = run_shell_command("svn cat -r %s '%s' | md5sum" % (source_rev_tmp, source_repos_url+working_path+"@"+str(source_rev_tmp)))
253 sum2 = run_shell_command("svn cat -r %s '%s' | md5sum" % (source_rev_tmp-1, source_repos_url+working_path_next+"@"+str(source_rev_tmp-1)))
254 is_diff = True if sum1 <> sum2 else False
255 if not is_diff:
256 # Check for property changes
257 props1 = svnclient.get_all_props(source_repos_url+working_path, source_rev_tmp)
258 props2 = svnclient.get_all_props(source_repos_url+working_path_next, source_rev_tmp-1)
259 # Ignore changes to "svn:mergeinfo", since we don't copy that
260 if 'svn:mergeinfo' in props1: del props1['svn:mergeinfo']
261 if 'svn:mergeinfo' in props2: del props2['svn:mergeinfo']
262 for prop in props1:
263 if prop not in props2 or \
264 props1[prop] != props2[prop]:
265 is_diff = True
266 break
267 for prop in props2:
268 if prop not in props1 or \
269 props1[prop] != props2[prop]:
270 is_diff = True
271 break
272 if not is_diff:
273 ui.status(" verify_commit: skip %s@%s", working_path, source_rev_tmp, level=ui.DEBUG, color='GREEN_B', bold=True)
274 else:
275 is_diff = True
276 if is_diff:
277 ui.status(" verify_commit: source_revs.append(%s), working_path:%s", source_rev_tmp, working_path, level=ui.DEBUG, color='GREEN_B')
278 source_revs.append({'path': working_path, 'revision': source_rev_tmp})
279 working_path = working_path_next
280 # Build a list of all the target commits "svn log" returned
281 target_revs = []
282 target_revs_rmndr = []
283 for log_entry in target_log_entries:
284 target_rev_tmp = log_entry['revision']
285 ui.status(" verify_commit: target_revs.append(%s)", target_rev_tmp, level=ui.DEBUG, color='GREEN_B')
286 target_revs.append(target_rev_tmp)
287 target_revs_rmndr.append(target_rev_tmp)
288 # Compare the two lists
289 for d in source_revs:
290 working_path = d['path']
291 source_rev_tmp = d['revision']
292 target_rev_tmp = get_rev_map(source_rev_tmp, " ")
293 working_offset = working_path[len(source_base):].strip("/")
294 sum1 = run_shell_command("svn cat -r %s '%s' | md5sum" % (source_rev_tmp, source_repos_url+working_path+"@"+str(source_rev_tmp)))
295 sum2 = run_shell_command("svn cat -r %s '%s' | md5sum" % (target_rev_tmp, target_url+"/"+working_offset+"@"+str(target_rev_tmp))) if target_rev_tmp is not None else ""
296 #print "source@%s: %s" % (str(source_rev_tmp).ljust(6), sum1)
297 #print "target@%s: %s" % (str(target_rev_tmp).ljust(6), sum2)
298 ui.status(" verify_commit: %s: source=%s target=%s", working_offset, source_rev_tmp, target_rev_tmp, level=ui.DEBUG, color='GREEN')
299 if not target_rev_tmp:
300 ui.status(" (%s/%s) Verify path: FAIL: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.EXTRA, color='RED')
301 ui.status("VerificationError: Unable to find corresponding target_rev for source_rev r%s in rev_map (path_offset='%s')", source_rev_tmp, path_offset, color='RED')
302 error_cnt +=1
303 continue
304 if target_rev_tmp not in target_revs:
305 # If found a source_rev with no equivalent target_rev in target_revs,
306 # check if the only difference in source_rev vs. source_rev-1 is the
307 # removal/addition of a trailing newline char, since this seems to get
308 # stripped-out sometimes during the replay (via "svn export"?).
309 # Strip any trailing \r\n from file-content (http://stackoverflow.com/a/1656218/346778)
310 sum1 = run_shell_command("svn cat -r %s '%s' | perl -i -p0777we's/\\r\\n\z//' | md5sum" % (source_rev_tmp, source_repos_url+working_path+"@"+str(source_rev_tmp)))
311 sum2 = run_shell_command("svn cat -r %s '%s' | perl -i -p0777we's/\\r\\n\z//' | md5sum" % (source_rev_tmp-1, source_repos_url+working_path+"@"+str(source_rev_tmp-1)))
312 if sum1 <> sum2:
313 ui.status(" (%s/%s) Verify path: FAIL: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.EXTRA, color='RED')
314 ui.status("VerificationError: Found source_rev (r%s) with no corresponding target_rev: path_offset='%s'", source_rev_tmp, path_offset, color='RED')
315 error_cnt +=1
316 continue
317 target_revs_rmndr.remove(target_rev_tmp)
318 if target_revs_rmndr:
319 rmndr_list = ", ".join(map(str, target_revs_rmndr))
320 ui.status(" (%s/%s) Verify path: FAIL: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.EXTRA, color='RED')
321 ui.status("VerificationError: Found one or more *extra* target_revs: path_offset='%s', target_revs='%s'", path_offset, rmndr_list, color='RED')
322 error_cnt +=1
323 ui.status(" (%s/%s) Verify path: OK: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.EXTRA)
324
325 # Ensure there are no "extra" files in the target side
326 if options.verify == 2:
327 target_paths = []
328 child_paths = run_svn(["list", "--recursive", "-r", target_rev, target_url+"@"+str(target_rev)])
329 child_paths = child_paths.strip("\n").split("\n")
330 for child_path in child_paths:
331 if not child_path:
332 continue
333 # Directories have a trailing slash in the "svn list" output
334 child_path_is_dir = True if child_path[-1] == "/" else False
335 child_path_offset = child_path.rstrip('/') if child_path_is_dir else child_path
336 if not child_path_is_dir:
337 target_paths.append(child_path_offset)
338 # Compare
339 for path_offset in target_paths:
340 if not path_offset in check_paths:
341 ui.status("VerificationError: Path exists in target (@%s) but not source (@%s): %s", target_rev, source_rev, path_offset, color='RED')
342 error_cnt += 1
343 for path_offset in check_paths:
344 if not path_offset in target_paths:
345 ui.status("VerificationError: Path exists in source (@%s) but not target (@%s): %s", source_rev, target_rev, path_offset, color='RED')
346 error_cnt += 1
347
348 if error_cnt > 0:
349 raise VerificationError("Found %s verification errors" % (error_cnt))
350 ui.status("Verified revision %s (%s).", target_rev, "all" if options.verify == 2 else "only-changed")
351
352 def full_svn_revert():
353 """
354 Do an "svn revert" and proactively remove any extra files in the working copy.
355 """
356 run_svn(["revert", "--recursive", "."])
357 output = run_svn(["status"])
358 if output:
359 output_lines = output.strip("\n").split("\n")
360 for line in output_lines:
361 if line[0] == "?":
362 path = line[4:].strip(" ")
363 if os.path.isfile(path):
364 os.remove(path)
365 if os.path.isdir(path):
366 shell.rmtree(path)
367
368 def gen_tracking_revprops(source_rev):
369 """
370 Build an array of svn2svn-specific source-tracking revprops.
371 """
372 revprops = [{'name':'svn2svn:source_uuid', 'value':source_repos_uuid},
373 {'name':'svn2svn:source_url', 'value':source_url},
374 {'name':'svn2svn:source_rev', 'value':source_rev}]
375 return revprops
376
377 def sync_svn_props(source_url, source_rev, path_offset):
378 """
379 Carry-forward any unversioned properties from the source repo to the
380 target WC.
381 """
382 source_props = svnclient.get_all_props(join_path(source_url, path_offset), source_rev)
383 target_props = svnclient.get_all_props(path_offset)
384 if 'svn:mergeinfo' in source_props:
385 # Never carry-forward "svn:mergeinfo"
386 del source_props['svn:mergeinfo']
387 for prop in target_props:
388 if prop not in source_props:
389 # Remove any properties which exist in target but not source
390 run_svn(["propdel", prop, path_offset])
391 for prop in source_props:
392 if prop not in target_props or \
393 source_props[prop] != target_props[prop]:
394 # Set/update any properties which exist in source but not target or
395 # whose value differs between source vs. target.
396 run_svn(["propset", prop, source_props[prop], path_offset])
397
398 def in_svn(p, require_in_repo=False, prefix=""):
399 """
400 Check if a given file/folder is being tracked by Subversion.
401 Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
402 With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
403 Use "svn status" to check the status of the file/folder.
404 """
405 entries = svnclient.get_svn_status(p, no_recursive=True)
406 if not entries:
407 return False
408 d = entries[0]
409 if require_in_repo and (d['status'] == 'added' or d['revision'] is None):
410 # If caller requires this path to be in the SVN repo, prevent returning True
411 # for paths that are only locally-added.
412 ret = False
413 else:
414 # Don't consider files tracked as deleted in the WC as under source-control.
415 # Consider files which are locally added/copied as under source-control.
416 ret = True if not (d['status'] == 'deleted') and (d['type'] == 'normal' or d['status'] == 'added' or d['copied'] == 'true') else False
417 ui.status(prefix + ">> in_svn('%s', require_in_repo=%s) --> %s", p, str(require_in_repo), str(ret), level=ui.DEBUG, color='GREEN')
418 return ret
419
420 def is_child_path(path, p_path):
421 return True if (path == p_path) or (path.startswith(p_path+"/")) else False
422
423 def join_path(base, child):
424 base.rstrip('/')
425 return base+"/"+child if child else base
426
427 def find_svn_ancestors(svn_repos_url, start_path, start_rev, stop_base_path=None, prefix=""):
428 """
429 Given an initial starting path+rev, walk the SVN history backwards to inspect the
430 ancestry of that path, optionally seeing if it traces back to stop_base_path.
431
432 Build an array of copyfrom_path and copyfrom_revision pairs for each of the "svn copy"'s.
433 If we find a copyfrom_path which stop_base_path is a substring match of (e.g. we crawled
434 back to the initial branch-copy from trunk), then return the collection of ancestor
435 paths. Otherwise, copyfrom_path has no ancestry compared to stop_base_path.
436
437 This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
438 file/folder was renamed in a branch and then that branch was merged back to trunk.
439
440 'svn_repos_url' is the full URL to the root of the SVN repository,
441 e.g. 'file:///path/to/repo'
442 'start_path' is the path in the SVN repo to the source path to start checking
443 ancestry at, e.g. '/branches/fix1/projectA/file1.txt'.
444 'start_rev' is the revision to start walking the history of start_path backwards from.
445 'stop_base_path' is the path in the SVN repo to stop tracing ancestry once we've reached,
446 i.e. the target path we're trying to trace ancestry back to, e.g. '/trunk'.
447 """
448 ui.status(prefix + ">> find_svn_ancestors: Start: (%s) start_path: %s stop_base_path: %s",
449 svn_repos_url, start_path+"@"+str(start_rev), stop_base_path, level=ui.DEBUG, color='YELLOW')
450 done = False
451 no_ancestry = False
452 cur_path = start_path
453 cur_rev = start_rev
454 first_iter_done = False
455 ancestors = []
456 while not done:
457 # Get the first "svn log" entry for cur_path (relative to @cur_rev)
458 ui.status(prefix + ">> find_svn_ancestors: %s", svn_repos_url+cur_path+"@"+str(cur_rev), level=ui.DEBUG, color='YELLOW')
459 log_entry = svnclient.get_first_svn_log_entry(svn_repos_url+cur_path, 1, cur_rev)
460 if not log_entry:
461 ui.status(prefix + ">> find_svn_ancestors: Done: no log_entry", level=ui.DEBUG, color='YELLOW')
462 done = True
463 break
464 # If we found a copy-from case which matches our stop_base_path, we're done.
465 # ...but only if we've at least tried to search for the first copy-from path.
466 if stop_base_path is not None and first_iter_done and is_child_path(cur_path, stop_base_path):
467 ui.status(prefix + ">> find_svn_ancestors: Done: Found is_child_path(cur_path, stop_base_path) and first_iter_done=True", level=ui.DEBUG, color='YELLOW')
468 done = True
469 break
470 first_iter_done = True
471 # Search for any actions on our target path (or parent paths).
472 changed_paths_temp = []
473 for d in log_entry['changed_paths']:
474 path = d['path']
475 if is_child_path(cur_path, path):
476 changed_paths_temp.append({'path': path, 'data': d})
477 if not changed_paths_temp:
478 # If no matches, then we've hit the end of the ancestry-chain.
479 ui.status(prefix + ">> find_svn_ancestors: Done: No matching changed_paths", level=ui.DEBUG, color='YELLOW')
480 done = True
481 continue
482 # Reverse-sort any matches, so that we start with the most-granular (deepest in the tree) path.
483 changed_paths = sorted(changed_paths_temp, key=operator.itemgetter('path'), reverse=True)
484 # Find the action for our cur_path in this revision. Use a loop to check in reverse order,
485 # so that if the target file/folder is "M" but has a parent folder with an "A" copy-from
486 # then we still correctly match the deepest copy-from.
487 for v in changed_paths:
488 d = v['data']
489 path = d['path']
490 # Check action-type for this file
491 action = d['action']
492 if action not in _valid_svn_actions:
493 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
494 % (log_entry['revision'], action))
495 ui.status(prefix + "> %s %s%s", action, path,
496 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
497 level=ui.DEBUG, color='YELLOW')
498 if action == 'D':
499 # If file/folder was deleted, ancestry-chain stops here
500 if stop_base_path:
501 no_ancestry = True
502 ui.status(prefix + ">> find_svn_ancestors: Done: deleted", level=ui.DEBUG, color='YELLOW')
503 done = True
504 break
505 if action in 'RA':
506 # If file/folder was added/replaced but not a copy, ancestry-chain stops here
507 if not d['copyfrom_path']:
508 if stop_base_path:
509 no_ancestry = True
510 ui.status(prefix + ">> find_svn_ancestors: Done: %s with no copyfrom_path",
511 "Added" if action == "A" else "Replaced",
512 level=ui.DEBUG, color='YELLOW')
513 done = True
514 break
515 # Else, file/folder was added/replaced and is a copy, so add an entry to our ancestors list
516 # and keep checking for ancestors
517 ui.status(prefix + ">> find_svn_ancestors: Found copy-from (action=%s): %s --> %s",
518 action, path, d['copyfrom_path']+"@"+str(d['copyfrom_revision']),
519 level=ui.DEBUG, color='YELLOW')
520 ancestors.append({'path': cur_path, 'revision': log_entry['revision'],
521 'copyfrom_path': cur_path.replace(d['path'], d['copyfrom_path']), 'copyfrom_rev': d['copyfrom_revision']})
522 cur_path = cur_path.replace(d['path'], d['copyfrom_path'])
523 cur_rev = d['copyfrom_revision']
524 # Follow the copy and keep on searching
525 break
526 if stop_base_path and no_ancestry:
527 # If we're tracing back ancestry to a specific target stop_base_path and
528 # the ancestry-chain stopped before we reached stop_base_path, then return
529 # nothing since there is no ancestry chaining back to that target.
530 ancestors = []
531 if ancestors:
532 if ui.get_level() >= ui.DEBUG:
533 max_len = 0
534 for idx in range(len(ancestors)):
535 d = ancestors[idx]
536 max_len = max(max_len, len(d['path']+"@"+str(d['revision'])))
537 ui.status(prefix + ">> find_svn_ancestors: Found parent ancestors:", level=ui.DEBUG, color='YELLOW_B')
538 for idx in range(len(ancestors)):
539 d = ancestors[idx]
540 ui.status(prefix + " [%s] %s --> %s", idx,
541 str(d['path']+"@"+str(d['revision'])).ljust(max_len),
542 str(d['copyfrom_path']+"@"+str(d['copyfrom_rev'])),
543 level=ui.DEBUG, color='YELLOW')
544 else:
545 ui.status(prefix + ">> find_svn_ancestors: No ancestor-chain found: %s",
546 svn_repos_url+start_path+"@"+str(start_rev), level=ui.DEBUG, color='YELLOW')
547 return ancestors
548
549 def get_rev_map(source_rev, prefix):
550 """
551 Find the equivalent rev # in the target repo for the given rev # from the source repo.
552 """
553 ui.status(prefix + ">> get_rev_map(%s)", source_rev, level=ui.DEBUG, color='GREEN')
554 # Find the highest entry less-than-or-equal-to source_rev
555 for rev in range(int(source_rev), 0, -1):
556 in_rev_map = True if rev in rev_map else False
557 ui.status(prefix + ">> get_rev_map: rev=%s in_rev_map=%s", rev, str(in_rev_map), level=ui.DEBUG, color='BLACK_B')
558 if in_rev_map:
559 return int(rev_map[rev])
560 # Else, we fell off the bottom of the rev_map. Ruh-roh...
561 return None
562
563 def set_rev_map(source_rev, target_rev):
564 #ui.status(">> set_rev_map: source_rev=%s target_rev=%s", source_rev, target_rev, level=ui.DEBUG, color='GREEN')
565 global rev_map
566 rev_map[int(source_rev)]=int(target_rev)
567
568 def build_rev_map(target_url, target_end_rev, source_info):
569 """
570 Check for any already-replayed history from source_url (source_info) and
571 build the mapping-table of source_rev -> target_rev.
572 """
573 global rev_map
574 rev_map = {}
575 ui.status("Rebuilding target_rev -> source_rev rev_map...", level=ui.VERBOSE)
576 proc_count = 0
577 it_log_entries = svnclient.iter_svn_log_entries(target_url, 1, target_end_rev, get_changed_paths=False, get_revprops=True)
578 for log_entry in it_log_entries:
579 if log_entry['revprops']:
580 revprops = {}
581 for v in log_entry['revprops']:
582 if v['name'].startswith('svn2svn:'):
583 revprops[v['name']] = v['value']
584 if revprops and \
585 revprops['svn2svn:source_uuid'] == source_info['repos_uuid'] and \
586 revprops['svn2svn:source_url'] == source_info['url']:
587 source_rev = revprops['svn2svn:source_rev']
588 target_rev = log_entry['revision']
589 set_rev_map(source_rev, target_rev)
590 proc_count += 1
591 if proc_count % 500 == 0:
592 ui.status("...processed %s (%s of %s)..." % (proc_count, target_rev, target_end_rev), level=ui.VERBOSE)
593
594 def get_svn_dirlist(svn_path, rev_number = ""):
595 """
596 Get a list of all the child contents (recusive) of the given folder path.
597 """
598 args = ["list"]
599 path = svn_path
600 if rev_number:
601 args += ["-r", rev_number]
602 path += "@"+str(rev_number)
603 args += [path]
604 paths = run_svn(args, no_fail=True)
605 paths = paths.strip("\n").split("\n") if len(paths)>1 else []
606 return paths
607
608 def path_in_list(paths, path):
609 for p in paths:
610 if is_child_path(path, p):
611 return True
612 return False
613
614 def add_path(paths, path):
615 if not path_in_list(paths, path):
616 paths.append(path)
617
618 def in_ancestors(ancestors, ancestor):
619 match = True
620 for idx in range(len(ancestors)-1, 0, -1):
621 if int(ancestors[idx]['revision']) > ancestor['revision']:
622 match = is_child_path(ancestor['path'], ancestors[idx]['path'])
623 break
624 return match
625
626 def do_svn_add(source_url, path_offset, source_rev, source_ancestors, \
627 parent_copyfrom_path="", parent_copyfrom_rev="", \
628 export_paths={}, is_dir = False, skip_paths=[], prefix = ""):
629 """
630 Given the add'd source path, replay the "svn add/copy" commands to correctly
631 track renames across copy-from's.
632
633 For example, consider a sequence of events like this:
634 1. svn copy /trunk /branches/fix1
635 2. (Make some changes on /branches/fix1)
636 3. svn mv /branches/fix1/Proj1 /branches/fix1/Proj2 " Rename folder
637 4. svn mv /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt " Rename file inside renamed folder
638 5. svn co /trunk && svn merge /branches/fix1
639 After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
640 and and add of /trunk/Proj2 copy-from /branches/fix1/Proj2. If we were just
641 to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
642 we'd lose the logical history that Proj2/file2.txt is really a descendant
643 of Proj1/file1.txt.
644
645 'path_offset' is the offset from source_base to the file to check ancestry for,
646 e.g. 'projectA/file1.txt'. path = source_repos_url + source_base + path_offset.
647 'source_rev' is the revision ("svn log") that we're processing from the source repo.
648 'parent_copyfrom_path' and 'parent_copyfrom_rev' is the copy-from path of the parent
649 directory, when being called recursively by do_svn_add_dir().
650 'export_paths' is the list of path_offset's that we've deferred running "svn export" on.
651 'is_dir' is whether path_offset is a directory (rather than a file).
652 """
653 source_base = source_url[len(source_repos_url):] # e.g. '/trunk'
654 ui.status(prefix + ">> do_svn_add: %s %s", join_path(source_base, path_offset)+"@"+str(source_rev),
655 " (parent-copyfrom: "+parent_copyfrom_path+"@"+str(parent_copyfrom_rev)+")" if parent_copyfrom_path else "",
656 level=ui.DEBUG, color='GREEN')
657 # Check if the given path has ancestors which chain back to the current source_base
658 found_ancestor = False
659 ancestors = find_svn_ancestors(source_repos_url, join_path(source_base, path_offset), source_rev, stop_base_path=source_base, prefix=prefix+" ")
660 ancestor = ancestors[len(ancestors)-1] if ancestors else None # Choose the eldest ancestor, i.e. where we reached stop_base_path=source_base
661 if ancestor and not in_ancestors(source_ancestors, ancestor):
662 ancestor = None
663 copyfrom_path = ancestor['copyfrom_path'] if ancestor else ""
664 copyfrom_rev = ancestor['copyfrom_rev'] if ancestor else ""
665 if ancestor:
666 # The copy-from path has ancestry back to source_url.
667 ui.status(prefix + ">> do_svn_add: Check copy-from: Found parent: %s", copyfrom_path+"@"+str(copyfrom_rev),
668 level=ui.DEBUG, color='GREEN', bold=True)
669 found_ancestor = True
670 # Map the copyfrom_rev (source repo) to the equivalent target repo rev #. This can
671 # return None in the case where copyfrom_rev is *before* our source_start_rev.
672 tgt_rev = get_rev_map(copyfrom_rev, prefix+" ")
673 ui.status(prefix + ">> do_svn_add: get_rev_map: %s (source) -> %s (target)", copyfrom_rev, tgt_rev, level=ui.DEBUG, color='GREEN')
674 else:
675 ui.status(prefix + ">> do_svn_add: Check copy-from: No ancestor chain found.", level=ui.DEBUG, color='GREEN')
676 found_ancestor = False
677 if found_ancestor and tgt_rev:
678 # Check if this path_offset in the target WC already has this ancestry, in which
679 # case there's no need to run the "svn copy" (again).
680 path_in_svn = in_svn(path_offset, prefix=prefix+" ")
681 log_entry = svnclient.get_last_svn_log_entry(path_offset, 1, 'HEAD', get_changed_paths=False) if in_svn(path_offset, require_in_repo=True, prefix=prefix+" ") else []
682 if (not log_entry or (log_entry['revision'] != tgt_rev)):
683 copyfrom_offset = copyfrom_path[len(source_base):].strip('/')
684 ui.status(prefix + ">> do_svn_add: svn_copy: Copy-from: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
685 ui.status(prefix + " copyfrom: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
686 ui.status(prefix + " p_copyfrom: %s", parent_copyfrom_path+"@"+str(parent_copyfrom_rev) if parent_copyfrom_path else "", level=ui.DEBUG, color='GREEN')
687 if path_in_svn and \
688 ((parent_copyfrom_path and is_child_path(copyfrom_path, parent_copyfrom_path)) and \
689 (parent_copyfrom_rev and copyfrom_rev == parent_copyfrom_rev)):
690 # When being called recursively, if this child entry has the same ancestor as the
691 # the parent, then no need to try to run another "svn copy".
692 ui.status(prefix + ">> do_svn_add: svn_copy: Same ancestry as parent: %s",
693 parent_copyfrom_path+"@"+str(parent_copyfrom_rev),level=ui.DEBUG, color='GREEN')
694 pass
695 else:
696 # Copy this path from the equivalent path+rev in the target repo, to create the
697 # equivalent history.
698 if parent_copyfrom_path:
699 # If we have a parent copy-from path, we mis-match that so display a status
700 # message describing the action we're mimic'ing. If path_in_svn, then this
701 # is logically a "replace" rather than an "add".
702 ui.status(" %s %s (from %s)", ('R' if path_in_svn else 'A'), join_path(source_base, path_offset), ancestors[0]['copyfrom_path']+"@"+str(copyfrom_rev), level=ui.VERBOSE)
703 if path_in_svn:
704 # If local file is already under version-control, then this is a replace.
705 ui.status(prefix + ">> do_svn_add: pre-copy: local path already exists: %s", path_offset, level=ui.DEBUG, color='GREEN')
706 run_svn(["update", path_offset])
707 run_svn(["remove", "--force", path_offset])
708 run_svn(["copy", "-r", tgt_rev, join_path(target_url, copyfrom_offset)+"@"+str(tgt_rev), path_offset])
709 if is_dir:
710 # Export the final verison of all files in this folder.
711 add_path(export_paths, path_offset)
712 else:
713 # Export the final verison of this file.
714 run_svn(["export", "--force", "-r", source_rev,
715 source_repos_url+join_path(source_base, path_offset)+"@"+str(source_rev), path_offset])
716 if options.keep_prop:
717 sync_svn_props(source_url, source_rev, path_offset)
718 else:
719 ui.status(prefix + ">> do_svn_add: Skipped 'svn copy': %s", path_offset, level=ui.DEBUG, color='GREEN')
720 else:
721 # Else, either this copy-from path has no ancestry back to source_url OR copyfrom_rev comes
722 # before our initial source_start_rev (i.e. tgt_rev == None), so can't do a "svn copy".
723 # Create (parent) directory if needed.
724 # TODO: This is (nearly) a duplicate of code in process_svn_log_entry(). Should this be
725 # split-out to a shared tag?
726 p_path = path_offset if is_dir else os.path.dirname(path_offset).strip() or None
727 if p_path and not os.path.exists(p_path):
728 run_svn(["mkdir", p_path])
729 if not in_svn(path_offset, prefix=prefix+" "):
730 if is_dir:
731 # Export the final verison of all files in this folder.
732 add_path(export_paths, path_offset)
733 else:
734 # Export the final verison of this file. We *need* to do this before running
735 # the "svn add", even if we end-up re-exporting this file again via export_paths.
736 run_svn(["export", "--force", "-r", source_rev,
737 source_repos_url+join_path(source_base, path_offset)+"@"+str(source_rev), path_offset])
738 # If not already under version-control, then "svn add" this file/folder.
739 run_svn(["add", "--parents", path_offset])
740 if options.keep_prop:
741 sync_svn_props(source_url, source_rev, path_offset)
742 if is_dir:
743 # For any folders that we process, process any child contents, so that we correctly
744 # replay copies/replaces/etc.
745 do_svn_add_dir(source_url, path_offset, source_rev, source_ancestors,
746 copyfrom_path, copyfrom_rev, export_paths, skip_paths, prefix+" ")
747
748 def do_svn_add_dir(source_url, path_offset, source_rev, source_ancestors, \
749 parent_copyfrom_path, parent_copyfrom_rev, \
750 export_paths, skip_paths, prefix=""):
751 source_base = source_url[len(source_repos_url):] # e.g. '/trunk'
752 # Get the directory contents, to compare between the local WC (target_url) vs. the remote repo (source_url)
753 # TODO: paths_local won't include add'd paths because "svn ls" lists the contents of the
754 # associated remote repo folder. (Is this a problem?)
755 paths_local = get_svn_dirlist(path_offset)
756 paths_remote = get_svn_dirlist(join_path(source_url, path_offset), source_rev)
757 ui.status(prefix + ">> do_svn_add_dir: paths_local: %s", str(paths_local), level=ui.DEBUG, color='GREEN')
758 ui.status(prefix + ">> do_svn_add_dir: paths_remote: %s", str(paths_remote), level=ui.DEBUG, color='GREEN')
759 # Update files/folders which exist in remote but not local
760 for path in paths_remote:
761 path_is_dir = True if path[-1] == "/" else False
762 working_path = join_path(path_offset, (path.rstrip('/') if path_is_dir else path)).lstrip('/')
763 #print "working_path:%s = path_offset:%s + path:%s" % (working_path, path_offset, path)
764 if not working_path in skip_paths:
765 do_svn_add(source_url, working_path, source_rev, source_ancestors,
766 parent_copyfrom_path, parent_copyfrom_rev,
767 export_paths, path_is_dir, skip_paths, prefix+" ")
768 # Remove files/folders which exist in local but not remote
769 for path in paths_local:
770 if not path in paths_remote:
771 path_is_dir = True if path[-1] == "/" else False
772 working_path = join_path(path_offset, (path.rstrip('/') if path_is_dir else path)).lstrip('/')
773 ui.status(" %s %s", 'D', join_path(source_base, working_path), level=ui.VERBOSE)
774 run_svn(["update", working_path])
775 run_svn(["remove", "--force", working_path])
776 # TODO: Does this handle deleted folders too? Wouldn't want to have a case
777 # where we only delete all files from folder but leave orphaned folder around.
778
779 def process_svn_log_entry(log_entry, ancestors, commit_paths, prefix = ""):
780 """
781 Process SVN changes from the given log entry. Build an array (commit_paths)
782 of the paths in the working-copy that were changed, i.e. the paths which
783 we'll pass to "svn commit".
784 """
785 export_paths = []
786 source_rev = log_entry['revision']
787 source_url = log_entry['url']
788 source_base = source_url[len(source_repos_url):] # e.g. '/trunk'
789 ui.status(prefix + ">> process_svn_log_entry: %s", source_url+"@"+str(source_rev), level=ui.DEBUG, color='GREEN')
790 for d in log_entry['changed_paths']:
791 # Get the full path for this changed_path
792 # e.g. '/branches/bug123/projectA/file1.txt'
793 path = d['path']
794 if not is_child_path(path, source_base):
795 # Ignore changed files that are not part of this subdir
796 ui.status(prefix + ">> process_svn_log_entry: Unrelated path: %s (base: %s)", path, source_base, level=ui.DEBUG, color='GREEN')
797 continue
798 if d['kind'] == "" or d['kind'] == 'none':
799 # The "kind" value was introduced in SVN 1.6, and "svn log --xml" won't return a "kind"
800 # value for commits made on a pre-1.6 repo, even if the server is now running 1.6.
801 # We need to use other methods to fetch the node-kind for these cases.
802 d['kind'] = svnclient.get_kind(source_repos_url, path, source_rev, d['action'], log_entry['changed_paths'])
803 assert (d['kind'] == 'file') or (d['kind'] == 'dir')
804 path_is_dir = True if d['kind'] == 'dir' else False
805 path_is_file = True if d['kind'] == 'file' else False
806 # Calculate the offset (based on source_base) for this changed_path
807 # e.g. 'projectA/file1.txt'
808 # (path = source_base + "/" + path_offset)
809 path_offset = path[len(source_base):].strip("/")
810 # Get the action for this path
811 action = d['action']
812 if action not in _valid_svn_actions:
813 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
814 % (source_rev, action))
815 ui.status(" %s %s%s", action, d['path'],
816 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
817 level=ui.VERBOSE)
818
819 # Try to be efficient and keep track of an explicit list of paths in the
820 # working copy that changed. If we commit from the root of the working copy,
821 # then SVN needs to crawl the entire working copy looking for pending changes.
822 commit_paths.append(path_offset)
823
824 # Special-handling for replace's
825 if action == 'R':
826 # If file was "replaced" (deleted then re-added, all in same revision),
827 # then we need to run the "svn rm" first, then change action='A'. This
828 # lets the normal code below handle re-"svn add"'ing the files. This
829 # should replicate the "replace".
830 if path_offset and in_svn(path_offset):
831 # Target path might not be under version-control yet, e.g. parent "add"
832 # was a copy-from a branch which had no ancestry back to trunk, and each
833 # child folder under that parent folder is a "replace" action on the final
834 # merge to trunk. Since the child folders will be in skip_paths, do_svn_add
835 # wouldn't have created them while processing the parent "add" path.
836 if path_is_dir:
837 # Need to "svn update" before "svn remove" in case child contents are at
838 # a higher rev than the (parent) path_offset.
839 run_svn(["update", path_offset])
840 run_svn(["remove", "--force", path_offset])
841 action = 'A'
842
843 # Handle all the various action-types
844 # (Handle "add" first, for "svn copy/move" support)
845 if action == 'A':
846 # Determine where to export from.
847 svn_copy = False
848 # Handle cases where this "add" was a copy from another URL in the source repo
849 if d['copyfrom_revision']:
850 copyfrom_path = d['copyfrom_path']
851 copyfrom_rev = d['copyfrom_revision']
852 skip_paths = []
853 for tmp_d in log_entry['changed_paths']:
854 tmp_path = tmp_d['path']
855 if is_child_path(tmp_path, path) and tmp_d['action'] in 'ARD':
856 # Build list of child entries which are also in the changed_paths list,
857 # so that do_svn_add() can skip processing these entries when recursing
858 # since we'll end-up processing them later. Don't include action="M" paths
859 # in this list because it's non-conclusive: it could just mean that the
860 # file was modified *after* the copy-from, so we still want do_svn_add()
861 # to re-create the correct ancestry.
862 tmp_path_offset = tmp_path[len(source_base):].strip("/")
863 skip_paths.append(tmp_path_offset)
864 do_svn_add(source_url, path_offset, source_rev, ancestors, "", "", export_paths, path_is_dir, skip_paths, prefix+" ")
865 # Else just "svn export" the files from the source repo and "svn add" them.
866 else:
867 # Create (parent) directory if needed
868 p_path = path_offset if path_is_dir else os.path.dirname(path_offset).strip() or None
869 if p_path and not os.path.exists(p_path):
870 run_svn(["mkdir", p_path])
871 # Export the entire added tree.
872 if path_is_dir:
873 # For directories, defer the (recurisve) "svn export". Might have a
874 # situation in a branch merge where the entry in the svn-log is a
875 # non-copy-from'd "add" but there are child contents (that we haven't
876 # gotten to yet in log_entry) that are copy-from's. When we try do
877 # the "svn copy" later on in do_svn_add() for those copy-from'd paths,
878 # having pre-existing (svn-add'd) contents creates some trouble.
879 # Instead, just create the stub folders ("svn mkdir" above) and defer
880 # exporting the final file-state until the end.
881 add_path(export_paths, path_offset)
882 else:
883 # Export the final verison of this file. We *need* to do this before running
884 # the "svn add", even if we end-up re-exporting this file again via export_paths.
885 run_svn(["export", "--force", "-r", source_rev,
886 join_path(source_url, path_offset)+"@"+str(source_rev), path_offset])
887 if not in_svn(path_offset, prefix=prefix+" "):
888 # Need to use in_svn here to handle cases where client committed the parent
889 # folder and each indiv sub-folder.
890 run_svn(["add", "--parents", path_offset])
891 if options.keep_prop:
892 sync_svn_props(source_url, source_rev, path_offset)
893
894 elif action == 'D':
895 if path_is_dir:
896 # For dirs, need to "svn update" before "svn remove" because the final
897 # "svn commit" will fail if the parent (path_offset) is at a lower rev
898 # than any of the child contents. This needs to be a recursive update.
899 run_svn(["update", path_offset])
900 run_svn(["remove", "--force", path_offset])
901
902 elif action == 'M':
903 if path_is_file:
904 run_svn(["export", "--force", "-N" , "-r", source_rev,
905 join_path(source_url, path_offset)+"@"+str(source_rev), path_offset])
906 if path_is_dir:
907 # For dirs, need to "svn update" before export/prop-sync because the
908 # final "svn commit" will fail if the parent is at a lower rev than
909 # child contents. Just need to update the rev-state of the dir (d['path']),
910 # don't need to recursively update all child contents.
911 # (??? is this the right reason?)
912 run_svn(["update", "-N", path_offset])
913 if options.keep_prop:
914 sync_svn_props(source_url, source_rev, path_offset)
915
916 else:
917 raise InternalError("Internal Error: process_svn_log_entry: Unhandled 'action' value: '%s'"
918 % action)
919
920 # Export the final version of all add'd paths from source_url
921 if export_paths:
922 for path_offset in export_paths:
923 run_svn(["export", "--force", "-r", source_rev,
924 join_path(source_url, path_offset)+"@"+str(source_rev), path_offset])
925
926 def keep_revnum(source_rev, target_rev_last, wc_target_tmp):
927 """
928 Add "padding" target revisions as needed to keep source and target
929 revision #'s identical.
930 """
931 bh = BreakHandler()
932 if int(source_rev) <= int(target_rev_last):
933 raise InternalError("keep-revnum mode is enabled, "
934 "but source revision (r%s) is less-than-or-equal last target revision (r%s)" % \
935 (source_rev, target_rev_last))
936 if int(target_rev_last) < int(source_rev)-1:
937 # Add "padding" target revisions to keep source and target rev #'s identical
938 if os.path.exists(wc_target_tmp):
939 shell.rmtree(wc_target_tmp)
940 run_svn(["checkout", "-r", "HEAD", "--depth=empty", target_repos_url, wc_target_tmp])
941 for rev_num in range(int(target_rev_last)+1, int(source_rev)):
942 run_svn(["propset", "svn2svn:keep-revnum", rev_num, wc_target_tmp])
943 # Prevent Ctrl-C's during this inner part, so we'll always display
944 # the "Commit revision ..." message if we ran a "svn commit".
945 bh.enable()
946 output = run_svn(["commit", "-m", "", wc_target_tmp])
947 rev_num_tmp = parse_svn_commit_rev(output) if output else None
948 assert rev_num == rev_num_tmp
949 ui.status("Committed revision %s (keep-revnum).", rev_num)
950 bh.disable()
951 # Check if the user tried to press Ctrl-C
952 if bh.trapped:
953 raise KeyboardInterrupt
954 target_rev_last = rev_num
955 shell.rmtree(wc_target_tmp)
956 return target_rev_last
957
958 def disp_svn_log_summary(log_entry):
959 ui.status("------------------------------------------------------------------------", level=ui.VERBOSE)
960 ui.status("r%s | %s | %s",
961 log_entry['revision'],
962 log_entry['author'],
963 str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' ')), level=ui.VERBOSE)
964 ui.status(log_entry['message'], level=ui.VERBOSE)
965
966 def real_main(args):
967 global source_url, target_url, rev_map
968 source_url = urllib.quote(args.pop(0).rstrip("/"),"/:") # e.g. 'http://server/svn/source/trunk'
969 target_url = urllib.quote(args.pop(0).rstrip("/"),"/:") # e.g. 'file:///svn/target/trunk'
970 ui.status("options: %s", str(options), level=ui.DEBUG, color='GREEN')
971
972 # Make sure that both the source and target URL's are valid
973 source_info = svnclient.get_svn_info(source_url)
974 assert is_child_path(source_url, source_info['repos_url'])
975 target_info = svnclient.get_svn_info(target_url)
976 assert is_child_path(target_url, target_info['repos_url'])
977
978 # Init global vars
979 global source_repos_url,source_base,source_repos_uuid
980 source_repos_url = source_info['repos_url'] # e.g. 'http://server/svn/source'
981 source_base = source_url[len(source_repos_url):] # e.g. '/trunk'
982 source_repos_uuid = source_info['repos_uuid']
983 global target_repos_url,target_base
984 target_repos_url = target_info['repos_url'] # e.g. 'http://server/svn/target'
985 target_base = target_url[len(target_repos_url):] # e.g. '/trunk'
986
987 # Init start and end revision
988 try:
989 source_start_rev = svnclient.get_svn_rev(source_repos_url, options.rev_start if options.rev_start else 1)
990 except ExternalCommandFailed:
991 print "Error: Invalid start source revision value: %s" % (options.rev_start)
992 sys.exit(1)
993 try:
994 source_end_rev = svnclient.get_svn_rev(source_repos_url, options.rev_end if options.rev_end else "HEAD")
995 except ExternalCommandFailed:
996 print "Error: Invalid end source revision value: %s" % (options.rev_end)
997 sys.exit(1)
998 ui.status("Using source revision range %s:%s", source_start_rev, source_end_rev, level=ui.VERBOSE)
999
1000 # TODO: If options.keep_date, should we try doing a "svn propset" on an *existing* revision
1001 # as a sanity check, so we check if the pre-revprop-change hook script is correctly setup
1002 # before doing first replay-commit?
1003
1004 target_rev_last = target_info['revision'] # Last revision # in the target repo
1005 wc_target = os.path.abspath('_wc_target')
1006 wc_target_tmp = os.path.abspath('_wc_target_tmp')
1007 num_entries_proc = 0
1008 commit_count = 0
1009 source_rev = None
1010 target_rev = None
1011
1012 # Check out a working copy of target_url if needed
1013 wc_exists = os.path.exists(wc_target)
1014 if wc_exists and not options.cont_from_break:
1015 shell.rmtree(wc_target)
1016 wc_exists = False
1017 if not wc_exists:
1018 ui.status("Checking-out _wc_target...", level=ui.VERBOSE)
1019 svnclient.svn_checkout(target_url, wc_target)
1020 os.chdir(wc_target)
1021 if wc_exists:
1022 # If using an existing WC, make sure it's clean ("svn revert")
1023 ui.status("Cleaning-up _wc_target...", level=ui.VERBOSE)
1024 run_svn(["cleanup"])
1025 full_svn_revert()
1026
1027 if not options.cont_from_break:
1028 # Warn user if trying to start (non-continue) into a non-empty target path
1029 if not options.force_nocont:
1030 top_paths = run_svn(["list", "-r", "HEAD", target_url])
1031 if len(top_paths)>0:
1032 print "Error: Trying to replay (non-continue-mode) into a non-empty target_url location. " \
1033 "Use --force if you're sure this is what you want."
1034 sys.exit(1)
1035 # Get the first log entry at/after source_start_rev, which is where
1036 # we'll do the initial import from.
1037 source_ancestors = find_svn_ancestors(source_repos_url, source_base, source_end_rev, prefix=" ")
1038 it_log_start = svnclient.iter_svn_log_entries(source_url, source_start_rev, source_end_rev, get_changed_paths=False, ancestors=source_ancestors)
1039 source_start_log = None
1040 for log_entry in it_log_start:
1041 # Pick the first entry. Need to use a "for ..." loop since we're using an iterator.
1042 source_start_log = log_entry
1043 break
1044 if not source_start_log:
1045 raise InternalError("Unable to find any matching revisions between %s:%s in source_url: %s" % \
1046 (source_start_rev, source_end_rev, source_url))
1047
1048 # This is the revision we will start from for source_url
1049 source_start_rev = int(source_start_log['revision'])
1050 ui.status("Starting at source revision %s.", source_start_rev, level=ui.VERBOSE)
1051 ui.status("", level=ui.VERBOSE)
1052 if options.keep_revnum and source_rev > target_rev_last:
1053 target_rev_last = keep_revnum(source_rev, target_rev_last, wc_target_tmp)
1054
1055 # For the initial commit to the target URL, export all the contents from
1056 # the source URL at the start-revision.
1057 disp_svn_log_summary(svnclient.get_one_svn_log_entry(source_repos_url, source_start_rev, source_start_rev))
1058 # Export and add file-contents from source_url@source_start_rev
1059 source_start_url = source_url if not source_ancestors else source_repos_url+source_ancestors[len(source_ancestors)-1]['copyfrom_path']
1060 top_paths = run_svn(["list", "-r", source_start_rev, source_start_url+"@"+str(source_start_rev)])
1061 top_paths = top_paths.strip("\n").split("\n")
1062 for path in top_paths:
1063 # For each top-level file/folder...
1064 if not path:
1065 continue
1066 # Directories have a trailing slash in the "svn list" output
1067 path_is_dir = True if path[-1] == "/" else False
1068 path_offset = path.rstrip('/') if path_is_dir else path
1069 if in_svn(path_offset, prefix=" "):
1070 raise InternalError("Cannot replay history on top of pre-existing structure: %s" % join_path(source_start_url, path_offset))
1071 if path_is_dir and not os.path.exists(path_offset):
1072 os.makedirs(path_offset)
1073 run_svn(["export", "--force", "-r" , source_start_rev, join_path(source_start_url, path_offset)+"@"+str(source_start_rev), path_offset])
1074 run_svn(["add", path_offset])
1075 # Update any properties on the newly added content
1076 paths = run_svn(["list", "--recursive", "-r", source_start_rev, source_start_url+"@"+str(source_start_rev)])
1077 paths = paths.strip("\n").split("\n")
1078 if options.keep_prop:
1079 sync_svn_props(source_start_url, source_start_rev, "")
1080 for path in paths:
1081 if not path:
1082 continue
1083 # Directories have a trailing slash in the "svn list" output
1084 path_is_dir = True if path[-1] == "/" else False
1085 path_offset = path.rstrip('/') if path_is_dir else path
1086 ui.status(" A %s", join_path(source_base, path_offset), level=ui.VERBOSE)
1087 if options.keep_prop:
1088 sync_svn_props(source_start_url, source_start_rev, path_offset)
1089 # Commit the initial import
1090 num_entries_proc += 1
1091 target_revprops = gen_tracking_revprops(source_start_rev) # Build source-tracking revprop's
1092 target_rev = commit_from_svn_log_entry(source_start_log, target_revprops=target_revprops)
1093 if target_rev:
1094 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
1095 set_rev_map(source_start_rev, target_rev)
1096 commit_count += 1
1097 target_rev_last = target_rev
1098 if options.verify:
1099 verify_commit(source_rev, target_rev_last)
1100 else:
1101 # Re-build the rev_map based on any already-replayed history in target_url
1102 build_rev_map(target_url, target_rev_last, source_info)
1103 if not rev_map:
1104 print "Error: Called with continue-mode, but no already-replayed source history found in target_url."
1105 sys.exit(1)
1106 source_start_rev = int(max(rev_map, key=rev_map.get))
1107 assert source_start_rev
1108 ui.status("Continuing from source revision %s.", source_start_rev, level=ui.VERBOSE)
1109 ui.status("", level=ui.VERBOSE)
1110
1111 svn_vers_t = svnclient.get_svn_client_version()
1112 svn_vers = float(".".join(map(str, svn_vers_t[0:2])))
1113
1114 # Load SVN log starting from source_start_rev + 1
1115 source_ancestors = find_svn_ancestors(source_repos_url, source_base, source_end_rev, prefix=" ")
1116 it_log_entries = svnclient.iter_svn_log_entries(source_url, source_start_rev+1, source_end_rev, get_revprops=True, ancestors=source_ancestors) if source_start_rev < source_end_rev else []
1117 source_rev_last = source_start_rev
1118
1119 try:
1120 for log_entry in it_log_entries:
1121 if options.entries_proc_limit:
1122 if num_entries_proc >= options.entries_proc_limit:
1123 break
1124 # Replay this revision from source_url into target_url
1125 source_rev = log_entry['revision']
1126 log_url = log_entry['url']
1127 #print "source_url:%s log_url:%s" % (source_url, log_url)
1128 if options.keep_revnum:
1129 if source_rev < target_rev_last:
1130 print "Error: Last target revision (r%s) is equal-or-higher than starting source revision (r%s). " \
1131 "Cannot use --keep-revnum mode." % (target_rev_last, source_start_rev)
1132 sys.exit(1)
1133 target_rev_last = keep_revnum(source_rev, target_rev_last, wc_target_tmp)
1134 disp_svn_log_summary(log_entry)
1135 # Process all the changed-paths in this log entry
1136 commit_paths = []
1137 process_svn_log_entry(log_entry, source_ancestors, commit_paths)
1138 num_entries_proc += 1
1139 # Commit any changes made to _wc_target
1140 target_revprops = gen_tracking_revprops(source_rev) # Build source-tracking revprop's
1141 target_rev = commit_from_svn_log_entry(log_entry, commit_paths, target_revprops=target_revprops)
1142 source_rev_last = source_rev
1143 if target_rev:
1144 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
1145 source_rev = log_entry['revision']
1146 set_rev_map(source_rev, target_rev)
1147 target_rev_last = target_rev
1148 commit_count += 1
1149 if options.verify:
1150 verify_commit(source_rev, target_rev_last, log_entry)
1151 # Run "svn cleanup" every 100 commits if SVN 1.7+, to clean-up orphaned ".svn/pristines/*"
1152 if svn_vers >= 1.7 and (commit_count % 100 == 0):
1153 run_svn(["cleanup"])
1154 if source_rev_last == source_start_rev:
1155 # If there were no new source_url revisions to process, still trigger
1156 # "full-mode" verify check (if enabled).
1157 if options.verify:
1158 verify_commit(source_rev_last, target_rev_last)
1159
1160 except KeyboardInterrupt:
1161 print "\nStopped by user."
1162 print "\nCleaning-up..."
1163 run_svn(["cleanup"])
1164 full_svn_revert()
1165 except:
1166 print "\nCommand failed with following error:\n"
1167 traceback.print_exc()
1168 print "\nCleaning-up..."
1169 run_svn(["cleanup"])
1170 print run_svn(["status"])
1171 full_svn_revert()
1172 finally:
1173 print "\nFinished at source revision %s%s." % (source_rev_last, " (dry-run)" if options.dry_run else "")
1174
1175 def main():
1176 # Defined as entry point. Must be callable without arguments.
1177 usage = "svn2svn, version %s\n" % str(full_version) + \
1178 "<http://nynim.org/projects/svn2svn> <https://github.com/tonyduckles/svn2svn>\n\n" + \
1179 "Usage: %prog [OPTIONS] source_url target_url\n"
1180 description = """\
1181 Replicate (replay) history from one SVN repository to another. Maintain
1182 logical ancestry wherever possible, so that 'svn log' on the replayed repo
1183 will correctly follow file/folder renames.
1184
1185 Examples:
1186 Create a copy of only /trunk from source repo, starting at r5000
1187 $ svnadmin create /svn/target
1188 $ svn mkdir -m 'Add trunk' file:///svn/target/trunk
1189 $ svn2svn -av -r 5000 http://server/source/trunk file:///svn/target/trunk
1190 1. The target_url will be checked-out to ./_wc_target
1191 2. The first commit to http://server/source/trunk at/after r5000 will be
1192 exported & added into _wc_target
1193 3. All revisions affecting http://server/source/trunk (starting at r5000)
1194 will be replayed to _wc_target. Any add/copy/move/replaces that are
1195 copy-from'd some path outside of /trunk (e.g. files renamed on a
1196 /branch and branch was merged into /trunk) will correctly maintain
1197 logical ancestry where possible.
1198
1199 Use continue-mode (-c) to pick-up where the last run left-off
1200 $ svn2svn -avc http://server/source/trunk file:///svn/target/trunk
1201 1. The target_url will be checked-out to ./_wc_target, if not already
1202 checked-out
1203 2. All new revisions affecting http://server/source/trunk starting from
1204 the last replayed revision to file:///svn/target/trunk (based on the
1205 svn2svn:* revprops) will be replayed to _wc_target, maintaining all
1206 logical ancestry where possible."""
1207 parser = optparse.OptionParser(usage, description=description,
1208 formatter=HelpFormatter(), version="%prog "+str(full_version))
1209 parser.add_option("-v", "--verbose", dest="verbosity", action="count", default=1,
1210 help="enable additional output (use -vv or -vvv for more)")
1211 parser.add_option("-a", "--archive", action="store_true", dest="archive", default=False,
1212 help="archive/mirror mode; same as -UDP (see REQUIRE's below)\n"
1213 "maintain same commit author, same commit time, and file/dir properties")
1214 parser.add_option("-U", "--keep-author", action="store_true", dest="keep_author", default=False,
1215 help="maintain same commit authors (svn:author) as source\n"
1216 "(REQUIRES 'pre-revprop-change' hook script to allow 'svn:author' changes)")
1217 parser.add_option("-D", "--keep-date", action="store_true", dest="keep_date", default=False,
1218 help="maintain same commit time (svn:date) as source\n"
1219 "(REQUIRES 'pre-revprop-change' hook script to allow 'svn:date' changes)")
1220 parser.add_option("-P", "--keep-prop", action="store_true", dest="keep_prop", default=False,
1221 help="maintain same file/dir SVN properties as source")
1222 parser.add_option("-R", "--keep-revnum", action="store_true", dest="keep_revnum", default=False,
1223 help="maintain same rev #'s as source. creates placeholder target "
1224 "revisions (by modifying a 'svn2svn:keep-revnum' property at the root of the target repo)")
1225 parser.add_option("-c", "--continue", action="store_true", dest="cont_from_break",
1226 help="continue from last source commit to target (based on svn2svn:* revprops)")
1227 parser.add_option("-f", "--force", action="store_true", dest="force_nocont",
1228 help="allow replaying into a non-empty target folder")
1229 parser.add_option("-r", "--revision", type="string", dest="revision", metavar="ARG",
1230 help="revision range to replay from source_url\n"
1231 "A revision argument can be one of:\n"
1232 " START start rev # (end will be 'HEAD')\n"
1233 " START:END start and ending rev #'s\n"
1234 "Any revision # formats which SVN understands are "
1235 "supported, e.g. 'HEAD', '{2010-01-31}', etc.")
1236 parser.add_option("-u", "--log-author", action="store_true", dest="log_author", default=False,
1237 help="append source commit author to replayed commit mesages")
1238 parser.add_option("-d", "--log-date", action="store_true", dest="log_date", default=False,
1239 help="append source commit time to replayed commit messages")
1240 parser.add_option("-l", "--limit", type="int", dest="entries_proc_limit", metavar="NUM",
1241 help="maximum number of source revisions to process")
1242 parser.add_option("-n", "--dry-run", action="store_true", dest="dry_run", default=False,
1243 help="process next source revision but don't commit changes to "
1244 "target working-copy (forces --limit=1)")
1245 parser.add_option("-x", "--verify", action="store_const", const=1, dest="verify",
1246 help="verify ancestry and content for changed paths in commit after every target commit or last target commit")
1247 parser.add_option("-X", "--verify-all", action="store_const", const=2, dest="verify",
1248 help="verify ancestry and content for entire target_url tree after every target commit or last target commit")
1249 parser.add_option("--debug", dest="verbosity", const=ui.DEBUG, action="store_const",
1250 help="enable debugging output (same as -vvv)")
1251 global options
1252 options, args = parser.parse_args()
1253 if len(args) != 2:
1254 parser.error("incorrect number of arguments")
1255 if options.verbosity < 10:
1256 # Expand multiple "-v" arguments to a real ui._level value
1257 options.verbosity *= 10
1258 if options.dry_run:
1259 # When in dry-run mode, only try to process the next log_entry
1260 options.entries_proc_limit = 1
1261 options.rev_start = None
1262 options.rev_end = None
1263 if options.revision:
1264 # Reg-ex for matching a revision arg (http://svnbook.red-bean.com/en/1.5/svn.tour.revs.specifiers.html#svn.tour.revs.dates)
1265 rev_patt = '[0-9A-Z]+|\{[0-9A-Za-z/\\ :-]+\}'
1266 rev = None
1267 match = re.match('^('+rev_patt+'):('+rev_patt+')$', options.revision) # First try start:end match
1268 if match is None: match = re.match('^('+rev_patt+')$', options.revision) # Next, try start match
1269 if match is None:
1270 parser.error("unexpected --revision argument format; see 'svn help log' for valid revision formats")
1271 rev = match.groups()
1272 options.rev_start = rev[0] if len(rev)>0 else None
1273 options.rev_end = rev[1] if len(rev)>1 else None
1274 if options.archive:
1275 options.keep_author = True
1276 options.keep_date = True
1277 options.keep_prop = True
1278 ui.update_config(options)
1279 return real_main(args)
1280
1281
1282 if __name__ == "__main__":
1283 sys.exit(main() or 0)