]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn/run/svn2svn.py
Prevent KeyboardInterrupt's during SVN commit
[svn2svn.git] / svn2svn / run / svn2svn.py
1 """
2 Replicate (replay) changesets from one SVN repository to another.
3 """
4
5 from .. import base_version, full_version
6 from .. import ui
7 from .. import svnclient
8 from ..shell import run_svn,run_shell_command
9 from ..errors import (ExternalCommandFailed, UnsupportedSVNAction, InternalError, VerificationError)
10 from parse import HelpFormatter
11 from breakhandler import BreakHandler
12
13 import sys
14 import os
15 import time
16 import traceback
17 import shutil
18 import operator
19 import optparse
20 import re
21 from datetime import datetime
22
23 _valid_svn_actions = "MARD" # The list of known SVN action abbr's, from "svn log"
24
25 # Module-level variables/parameters
26 source_url = "" # URL to source path in source SVN repo, e.g. 'http://server/svn/source/trunk'
27 source_repos_url = "" # URL to root of source SVN repo, e.g. 'http://server/svn/source'
28 source_base = "" # Relative path of source_url in source SVN repo, e.g. '/trunk'
29 source_repos_uuid = "" # UUID of source SVN repo
30 target_url ="" # URL to target path in target SVN repo, e.g. 'file:///svn/repo_target/trunk'
31 target_repos_url = "" # URL to root of target SVN repo, e.g. 'http://server/svn/target'
32 target_base = "" # Relative path of target_url in target SVN repo, e.g. '/trunk'
33 rev_map = {} # The running mapping-table dictionary for source_url rev #'s -> target_url rev #'s
34 options = None # optparser options
35
36 def parse_svn_commit_rev(output):
37 """
38 Parse the revision number from the output of "svn commit".
39 """
40 output_lines = output.strip("\n").split("\n")
41 rev_num = None
42 for line in output_lines:
43 if line[0:19] == 'Committed revision ':
44 rev_num = line[19:].rstrip('.')
45 break
46 assert rev_num is not None
47 return int(rev_num)
48
49 def commit_from_svn_log_entry(log_entry, commit_paths=None, target_revprops=None):
50 """
51 Given an SVN log entry and an optional list of changed paths, do an svn commit.
52 """
53 # TODO: Run optional external shell hook here, for doing pre-commit filtering
54 # Display the _wc_target "svn status" info if running in -vv (or higher) mode
55 if ui.get_level() >= ui.EXTRA:
56 ui.status(">> commit_from_svn_log_entry: Pre-commit _wc_target status:", level=ui.EXTRA, color='CYAN')
57 ui.status(run_svn(["status"]), level=ui.EXTRA, color='CYAN')
58 # This will use the local timezone for displaying commit times
59 timestamp = int(log_entry['date'])
60 svn_date = str(datetime.fromtimestamp(timestamp))
61 # Uncomment this one one if you prefer UTC commit times
62 #svn_date = "%d 0" % timestamp
63 args = ["commit", "--force-log"]
64 message = log_entry['message']
65 if options.log_date:
66 message += "\nDate: " + svn_date
67 if options.log_author:
68 message += "\nAuthor: " + log_entry['author']
69 if options.keep_author:
70 args += ["--username", log_entry['author']]
71 args += ["-m", message]
72 revprops = {}
73 if log_entry['revprops']:
74 # Carry forward any revprop's from the source revision
75 for v in log_entry['revprops']:
76 revprops[v['name']] = v['value']
77 if target_revprops:
78 # Add any extra revprop's we want to set for the target repo commits
79 for v in target_revprops:
80 revprops[v['name']] = v['value']
81 if revprops:
82 for key in revprops:
83 args += ["--with-revprop", "%s=%s" % (key, str(revprops[key]))]
84 if commit_paths:
85 if len(commit_paths)<100:
86 # If we don't have an excessive amount of individual changed paths, pass
87 # those to the "svn commit" command. Else, pass nothing so we commit at
88 # the root of the working-copy.
89 args += list(commit_paths)
90 rev_num = None
91 if not options.dry_run:
92 # Use BreakHandler class to temporarily redirect SIGINT handler, so that
93 # "svn commit" + post-commit rev-prop updating is a quasi-atomic unit.
94 # If user presses Ctrl-C during this, wait until after this full action
95 # has finished raising the KeyboardInterrupt exception.
96 bh = BreakHandler()
97 bh.enable()
98 # Run the "svn commit" command, and screen-scrape the target_rev value (if any)
99 output = run_svn(args)
100 rev_num = parse_svn_commit_rev(output) if output else None
101 if rev_num is not None:
102 ui.status("Committed revision %s.", rev_num)
103 if options.keep_date:
104 run_svn(["propset", "--revprop", "-r", rev_num, "svn:date", log_entry['date_raw']])
105 bh.disable()
106 # Check if the user tried to press Ctrl-C
107 if bh.trapped:
108 raise KeyboardInterrupt
109 return rev_num
110
111 def verify_commit(source_rev, target_rev, log_entry=None):
112 """
113 Compare the ancestry/content/properties between source_url vs target_url
114 for a given revision.
115 """
116 # Gather the offsets in the source repo to check
117 check_paths = []
118 remove_paths = []
119 # TODO: Need to make this ancestry aware
120 if options.verify == 1 and log_entry is not None: # Changed only
121 ui.status("Verifying source revision %s (only-changed)...", source_rev, level=ui.VERBOSE)
122 for d in log_entry['changed_paths']:
123 path = d['path']
124 if not is_child_path(path, source_base):
125 continue
126 if d['kind'] == "":
127 d['kind'] = svnclient.get_kind(source_repos_url, path, source_rev, d['action'], log_entry['changed_paths'])
128 assert (d['kind'] == 'file') or (d['kind'] == 'dir')
129 path_is_dir = True if d['kind'] == 'dir' else False
130 path_is_file = True if d['kind'] == 'file' else False
131 path_offset = path[len(source_base):].strip("/")
132 if d['action'] == 'D':
133 remove_paths.append(path_offset)
134 elif not path_offset in check_paths:
135 ui.status("verify_commit: path [mode=changed]: kind=%s: %s", d['kind'], path, level=ui.DEBUG, color='YELLOW')
136 if path_is_file:
137 ui.status(" "+"verify_commit [mode=changed]: check_paths.append('%s')", path_offset, level=ui.DEBUG, color='GREEN')
138 check_paths.append(path_offset)
139 if path_is_dir:
140 if not d['action'] in 'AR':
141 continue
142 child_paths = run_svn(["list", "--recursive", "-r", source_rev, source_url.rstrip("/")+"/"+path_offset+"@"+str(source_rev)])
143 child_paths = child_paths.strip("\n").split("\n")
144 for child_path in child_paths:
145 if not child_path:
146 continue
147 # Directories have a trailing slash in the "svn list" output
148 child_path_is_dir = True if child_path[-1] == "/" else False
149 child_path_offset = child_path.rstrip('/') if child_path_is_dir else child_path
150 if not child_path_is_dir:
151 # Only check files
152 working_path = (path_offset+"/" if path_offset else "") + child_path_offset
153 if not working_path in check_paths:
154 ui.status(" "+"verify_commit [mode=changed]: check_paths.append('%s'+'/'+'%s')", path_offset, child_path_offset, level=ui.DEBUG, color='GREEN')
155 check_paths.append(working_path)
156 if options.verify == 2: # All paths
157 ui.status("Verifying source revision %s (all)...", source_rev, level=ui.VERBOSE)
158 child_paths = run_svn(["list", "--recursive", "-r", source_rev, source_url+"@"+str(source_rev)])
159 child_paths = child_paths.strip("\n").split("\n")
160 for child_path in child_paths:
161 if not child_path:
162 continue
163 # Directories have a trailing slash in the "svn list" output
164 child_path_is_dir = True if child_path[-1] == "/" else False
165 child_path_offset = child_path.rstrip('/') if child_path_is_dir else child_path
166 if not child_path_is_dir:
167 # Only check files
168 ui.status("verify_commit [mode=all]: check_paths.append('%s')", child_path_offset, level=ui.DEBUG, color='GREEN')
169 check_paths.append(child_path_offset)
170
171 # If there were any paths deleted in the last revision (options.verify=1 mode),
172 # check that they were correctly deleted.
173 if remove_paths:
174 count_total = len(remove_paths)
175 count = 0
176 for path_offset in remove_paths:
177 count += 1
178 if in_svn(path_offset):
179 ui.status(" (%s/%s) Verify path: FAIL: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.VERBOSE, color='RED')
180 raise VerificationError("Path removed in source rev r%s, but still exists in target WC: %s" % (source_rev, path_offset))
181 ui.status(" (%s/%s) Verify remove: OK: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.VERBOSE)
182
183 # Compare each of the check_path entries between source vs. target
184 if check_paths:
185 source_rev_first = int(min(rev_map, key=rev_map.get)) or 1 # The first source_rev we replayed into target
186 ui.status("verify_commit: source_rev_first:%s", source_rev_first, level=ui.DEBUG, color='YELLOW')
187 count_total = len(check_paths)
188 count = 0
189 for path_offset in check_paths:
190 count += 1
191 ui.status("verify_commit: path_offset:%s", path_offset, level=ui.DEBUG, color='YELLOW')
192 source_log_entries = svnclient.run_svn_log(source_url.rstrip("/")+"/"+path_offset+"@"+str(source_rev), source_rev, 1, source_rev-source_rev_first+1)
193 target_log_entries = svnclient.run_svn_log(target_url.rstrip("/")+"/"+path_offset+"@"+str(target_rev), target_rev, 1, target_rev)
194 # Build a list of commits in source_log_entries which matches our
195 # target path_offset.
196 working_path = source_base+"/"+path_offset
197 source_revs = []
198 for log_entry in source_log_entries:
199 source_rev_tmp = log_entry['revision']
200 if source_rev_tmp < source_rev_first:
201 # Only process source revisions which have been replayed into target
202 break
203 #ui.status(" [verify_commit] source_rev_tmp:%s, working_path:%s\n%s", source_rev_tmp, working_path, pp.pformat(log_entry), level=ui.DEBUG, color='MAGENTA')
204 changed_paths_temp = []
205 for d in log_entry['changed_paths']:
206 path = d['path']
207 # Match working_path or any parents
208 if is_child_path(working_path, path):
209 ui.status(" verify_commit: changed_path: %s %s@%s (parent:%s)", d['action'], path, source_rev_tmp, working_path, level=ui.DEBUG, color='YELLOW')
210 changed_paths_temp.append({'path': path, 'data': d})
211 assert changed_paths_temp
212 # Reverse-sort any matches, so that we start with the most-granular (deepest in the tree) path.
213 changed_paths = sorted(changed_paths_temp, key=operator.itemgetter('path'), reverse=True)
214 # Find the action for our working_path in this revision. Use a loop to check in reverse order,
215 # so that if the target file/folder is "M" but has a parent folder with an "A" copy-from.
216 working_path_next = working_path
217 match_d = {}
218 for v in changed_paths:
219 d = v['data']
220 if not match_d:
221 match_d = d
222 path = d['path']
223 if d['action'] not in _valid_svn_actions:
224 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
225 % (log_entry['revision'], d['action']))
226 if d['action'] in 'AR' and d['copyfrom_revision']:
227 # If we found a copy-from action for a parent path, adjust our
228 # working_path to follow the rename/copy-from, just like find_svn_ancestors().
229 working_path_next = working_path.replace(d['path'], d['copyfrom_path'])
230 match_d = d
231 break
232 if is_child_path(working_path, source_base):
233 # Only add source_rev's where the path changed in this revision was a child
234 # of source_base, so that we silently ignore any history that happened on
235 # non-source_base paths (e.g. ignore branch history if we're only replaying trunk).
236 is_diff = False
237 d = match_d
238 if d['action'] == 'M':
239 # For action="M", we need to throw out cases where the only change was to
240 # a property which we ignore, e.g. "svn:mergeinfo".
241 if d['kind'] == "":
242 d['kind'] = svnclient.get_kind(source_repos_url, working_path, log_entry['revision'], d['action'], log_entry['changed_paths'])
243 assert (d['kind'] == 'file') or (d['kind'] == 'dir')
244 if d['kind'] == 'file':
245 # Check for file-content changes
246 # TODO: This should be made ancestor-aware, since the file won't always be at the same path in rev-1
247 sum1 = run_shell_command("svn cat -r %s '%s' | md5sum" % (source_rev_tmp, source_repos_url+working_path+"@"+str(source_rev_tmp)))
248 sum2 = run_shell_command("svn cat -r %s '%s' | md5sum" % (source_rev_tmp-1, source_repos_url+working_path_next+"@"+str(source_rev_tmp-1)))
249 is_diff = True if sum1 <> sum2 else False
250 if not is_diff:
251 # Check for property changes
252 props1 = svnclient.get_all_props(source_repos_url+working_path, source_rev_tmp)
253 props2 = svnclient.get_all_props(source_repos_url+working_path_next, source_rev_tmp-1)
254 # Ignore changes to "svn:mergeinfo", since we don't copy that
255 if 'svn:mergeinfo' in props1: del props1['svn:mergeinfo']
256 if 'svn:mergeinfo' in props2: del props2['svn:mergeinfo']
257 for prop in props1:
258 if prop not in props2 or \
259 props1[prop] != props2[prop]:
260 is_diff = True
261 break
262 for prop in props2:
263 if prop not in props1 or \
264 props1[prop] != props2[prop]:
265 is_diff = True
266 break
267 if not is_diff:
268 ui.status(" verify_commit: skip %s@%s", working_path, source_rev_tmp, level=ui.DEBUG, color='GREEN_B', bold=True)
269 else:
270 is_diff = True
271 if is_diff:
272 ui.status(" verify_commit: source_revs.append(%s), working_path:%s", source_rev_tmp, working_path, level=ui.DEBUG, color='GREEN_B')
273 source_revs.append({'path': working_path, 'revision': source_rev_tmp})
274 working_path = working_path_next
275 # Build a list of all the target commits "svn log" returned
276 target_revs = []
277 target_revs_rmndr = []
278 for log_entry in target_log_entries:
279 target_rev_tmp = log_entry['revision']
280 ui.status(" verify_commit: target_revs.append(%s)", target_rev_tmp, level=ui.DEBUG, color='GREEN_B')
281 target_revs.append(target_rev_tmp)
282 target_revs_rmndr.append(target_rev_tmp)
283 # Compare the two lists
284 for d in source_revs:
285 working_path = d['path']
286 source_rev_tmp = d['revision']
287 target_rev_tmp = get_rev_map(source_rev_tmp, " ")
288 working_offset = working_path[len(source_base):].strip("/")
289 sum1 = run_shell_command("svn cat -r %s '%s' | md5sum" % (source_rev_tmp, source_repos_url+working_path+"@"+str(source_rev_tmp)))
290 sum2 = run_shell_command("svn cat -r %s '%s' | md5sum" % (target_rev_tmp, target_url+"/"+working_offset+"@"+str(target_rev_tmp))) if target_rev_tmp is not None else ""
291 #print "source@%s: %s" % (str(source_rev_tmp).ljust(6), sum1)
292 #print "target@%s: %s" % (str(target_rev_tmp).ljust(6), sum2)
293 ui.status(" verify_commit: %s: source=%s target=%s", working_offset, source_rev_tmp, target_rev_tmp, level=ui.DEBUG, color='GREEN')
294 if not target_rev_tmp:
295 ui.status(" (%s/%s) Verify path: FAIL: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.VERBOSE, color='RED')
296 raise VerificationError("Unable to find corresponding target_rev for source_rev r%s in rev_map (path_offset='%s')" % (source_rev_tmp, path_offset))
297 if target_rev_tmp not in target_revs:
298 # If found a source_rev with no equivalent target_rev in target_revs,
299 # check if the only difference in source_rev vs. source_rev-1 is the
300 # removal/addition of a trailing newline char, since this seems to get
301 # stripped-out sometimes during the replay (via "svn export"?).
302 # Strip any trailing \r\n from file-content (http://stackoverflow.com/a/1656218/346778)
303 sum1 = run_shell_command("svn cat -r %s '%s' | perl -i -p0777we's/\\r\\n\z//' | md5sum" % (source_rev_tmp, source_repos_url+working_path+"@"+str(source_rev_tmp)))
304 sum2 = run_shell_command("svn cat -r %s '%s' | perl -i -p0777we's/\\r\\n\z//' | md5sum" % (source_rev_tmp-1, source_repos_url+working_path+"@"+str(source_rev_tmp-1)))
305 if sum1 <> sum2:
306 ui.status(" (%s/%s) Verify path: FAIL: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.VERBOSE, color='RED')
307 raise VerificationError("Found source_rev (r%s) with no corresponding target_rev: path_offset='%s'" % (source_rev_tmp, path_offset))
308 target_revs_rmndr.remove(target_rev_tmp)
309 if target_revs_rmndr:
310 rmndr_list = ", ".join(map(str, target_revs_rmndr))
311 ui.status(" (%s/%s) Verify path: FAIL: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.VERBOSE, color='RED')
312 raise VerificationError("Found one or more *extra* target_revs: path_offset='%s', target_revs='%s'" % (path_offset, rmndr_list))
313 ui.status(" (%s/%s) Verify path: OK: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.VERBOSE)
314
315 # Ensure there are no "extra" files in the target side
316 if options.verify == 2:
317 target_paths = []
318 child_paths = run_svn(["list", "--recursive", "-r", target_rev, target_url+"@"+str(target_rev)])
319 child_paths = child_paths.strip("\n").split("\n")
320 for child_path in child_paths:
321 if not child_path:
322 continue
323 # Directories have a trailing slash in the "svn list" output
324 child_path_is_dir = True if child_path[-1] == "/" else False
325 child_path_offset = child_path.rstrip('/') if child_path_is_dir else child_path
326 if not child_path_is_dir:
327 target_paths.append(child_path_offset)
328 # Compare
329 for path_offset in target_paths:
330 if not path_offset in check_paths:
331 raise VerificationError("Path exists in target (@%s) but not source (@%s): %s" % (target_rev, source_rev, path_offset))
332 for path_offset in check_paths:
333 if not path_offset in target_paths:
334 raise VerificationError("Path exists in source (@%s) but not target (@%s): %s" % (source_rev, target_rev, path_offset))
335
336 def full_svn_revert():
337 """
338 Do an "svn revert" and proactively remove any extra files in the working copy.
339 """
340 run_svn(["revert", "--recursive", "."])
341 output = run_svn(["status"])
342 if output:
343 output_lines = output.strip("\n").split("\n")
344 for line in output_lines:
345 if line[0] == "?":
346 path = line[4:].strip(" ")
347 if os.path.isfile(path):
348 os.remove(path)
349 if os.path.isdir(path):
350 shutil.rmtree(path)
351
352 def gen_tracking_revprops(source_rev):
353 """
354 Build an array of svn2svn-specific source-tracking revprops.
355 """
356 revprops = [{'name':'svn2svn:source_uuid', 'value':source_repos_uuid},
357 {'name':'svn2svn:source_url', 'value':source_url},
358 {'name':'svn2svn:source_rev', 'value':source_rev}]
359 return revprops
360
361 def sync_svn_props(source_url, source_rev, path_offset):
362 """
363 Carry-forward any unversioned properties from the source repo to the
364 target WC.
365 """
366 source_props = svnclient.get_all_props(join_path(source_url, path_offset), source_rev)
367 target_props = svnclient.get_all_props(path_offset)
368 if 'svn:mergeinfo' in source_props:
369 # Never carry-forward "svn:mergeinfo"
370 del source_props['svn:mergeinfo']
371 for prop in target_props:
372 if prop not in source_props:
373 # Remove any properties which exist in target but not source
374 run_svn(["propdel", prop, path_offset])
375 for prop in source_props:
376 if prop not in target_props or \
377 source_props[prop] != target_props[prop]:
378 # Set/update any properties which exist in source but not target or
379 # whose value differs between source vs. target.
380 run_svn(["propset", prop, source_props[prop], path_offset])
381
382 def in_svn(p, require_in_repo=False, prefix=""):
383 """
384 Check if a given file/folder is being tracked by Subversion.
385 Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
386 With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
387 Use "svn status" to check the status of the file/folder.
388 """
389 entries = svnclient.get_svn_status(p, no_recursive=True)
390 if not entries:
391 return False
392 d = entries[0]
393 if require_in_repo and (d['status'] == 'added' or d['revision'] is None):
394 # If caller requires this path to be in the SVN repo, prevent returning True
395 # for paths that are only locally-added.
396 ret = False
397 else:
398 # Don't consider files tracked as deleted in the WC as under source-control.
399 # Consider files which are locally added/copied as under source-control.
400 ret = True if not (d['status'] == 'deleted') and (d['type'] == 'normal' or d['status'] == 'added' or d['copied'] == 'true') else False
401 ui.status(prefix + ">> in_svn('%s', require_in_repo=%s) --> %s", p, str(require_in_repo), str(ret), level=ui.DEBUG, color='GREEN')
402 return ret
403
404 def is_child_path(path, p_path):
405 return True if (path == p_path) or (path.startswith(p_path+"/")) else False
406
407 def join_path(base, child):
408 base.rstrip('/')
409 return base+"/"+child if child else base
410
411 def find_svn_ancestors(svn_repos_url, start_path, start_rev, stop_base_path=None, prefix=""):
412 """
413 Given an initial starting path+rev, walk the SVN history backwards to inspect the
414 ancestry of that path, optionally seeing if it traces back to stop_base_path.
415
416 Build an array of copyfrom_path and copyfrom_revision pairs for each of the "svn copy"'s.
417 If we find a copyfrom_path which stop_base_path is a substring match of (e.g. we crawled
418 back to the initial branch-copy from trunk), then return the collection of ancestor
419 paths. Otherwise, copyfrom_path has no ancestry compared to stop_base_path.
420
421 This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
422 file/folder was renamed in a branch and then that branch was merged back to trunk.
423
424 'svn_repos_url' is the full URL to the root of the SVN repository,
425 e.g. 'file:///path/to/repo'
426 'start_path' is the path in the SVN repo to the source path to start checking
427 ancestry at, e.g. '/branches/fix1/projectA/file1.txt'.
428 'start_rev' is the revision to start walking the history of start_path backwards from.
429 'stop_base_path' is the path in the SVN repo to stop tracing ancestry once we've reached,
430 i.e. the target path we're trying to trace ancestry back to, e.g. '/trunk'.
431 """
432 ui.status(prefix + ">> find_svn_ancestors: Start: (%s) start_path: %s stop_base_path: %s",
433 svn_repos_url, start_path+"@"+str(start_rev), stop_base_path, level=ui.DEBUG, color='YELLOW')
434 done = False
435 no_ancestry = False
436 cur_path = start_path
437 cur_rev = start_rev
438 first_iter_done = False
439 ancestors = []
440 while not done:
441 # Get the first "svn log" entry for cur_path (relative to @cur_rev)
442 ui.status(prefix + ">> find_svn_ancestors: %s", svn_repos_url+cur_path+"@"+str(cur_rev), level=ui.DEBUG, color='YELLOW')
443 log_entry = svnclient.get_first_svn_log_entry(svn_repos_url+cur_path, 1, cur_rev)
444 if not log_entry:
445 ui.status(prefix + ">> find_svn_ancestors: Done: no log_entry", level=ui.DEBUG, color='YELLOW')
446 done = True
447 break
448 # If we found a copy-from case which matches our stop_base_path, we're done.
449 # ...but only if we've at least tried to search for the first copy-from path.
450 if stop_base_path is not None and first_iter_done and is_child_path(cur_path, stop_base_path):
451 ui.status(prefix + ">> find_svn_ancestors: Done: Found is_child_path(cur_path, stop_base_path) and first_iter_done=True", level=ui.DEBUG, color='YELLOW')
452 done = True
453 break
454 first_iter_done = True
455 # Search for any actions on our target path (or parent paths).
456 changed_paths_temp = []
457 for d in log_entry['changed_paths']:
458 path = d['path']
459 if is_child_path(cur_path, path):
460 changed_paths_temp.append({'path': path, 'data': d})
461 if not changed_paths_temp:
462 # If no matches, then we've hit the end of the ancestry-chain.
463 ui.status(prefix + ">> find_svn_ancestors: Done: No matching changed_paths", level=ui.DEBUG, color='YELLOW')
464 done = True
465 continue
466 # Reverse-sort any matches, so that we start with the most-granular (deepest in the tree) path.
467 changed_paths = sorted(changed_paths_temp, key=operator.itemgetter('path'), reverse=True)
468 # Find the action for our cur_path in this revision. Use a loop to check in reverse order,
469 # so that if the target file/folder is "M" but has a parent folder with an "A" copy-from
470 # then we still correctly match the deepest copy-from.
471 for v in changed_paths:
472 d = v['data']
473 path = d['path']
474 # Check action-type for this file
475 action = d['action']
476 if action not in _valid_svn_actions:
477 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
478 % (log_entry['revision'], action))
479 ui.status(prefix + "> %s %s%s", action, path,
480 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
481 level=ui.DEBUG, color='YELLOW')
482 if action == 'D':
483 # If file/folder was deleted, ancestry-chain stops here
484 if stop_base_path:
485 no_ancestry = True
486 ui.status(prefix + ">> find_svn_ancestors: Done: deleted", level=ui.DEBUG, color='YELLOW')
487 done = True
488 break
489 if action in 'RA':
490 # If file/folder was added/replaced but not a copy, ancestry-chain stops here
491 if not d['copyfrom_path']:
492 if stop_base_path:
493 no_ancestry = True
494 ui.status(prefix + ">> find_svn_ancestors: Done: %s with no copyfrom_path",
495 "Added" if action == "A" else "Replaced",
496 level=ui.DEBUG, color='YELLOW')
497 done = True
498 break
499 # Else, file/folder was added/replaced and is a copy, so add an entry to our ancestors list
500 # and keep checking for ancestors
501 ui.status(prefix + ">> find_svn_ancestors: Found copy-from (action=%s): %s --> %s",
502 action, path, d['copyfrom_path']+"@"+str(d['copyfrom_revision']),
503 level=ui.DEBUG, color='YELLOW')
504 ancestors.append({'path': cur_path, 'revision': log_entry['revision'],
505 'copyfrom_path': cur_path.replace(d['path'], d['copyfrom_path']), 'copyfrom_rev': d['copyfrom_revision']})
506 cur_path = cur_path.replace(d['path'], d['copyfrom_path'])
507 cur_rev = d['copyfrom_revision']
508 # Follow the copy and keep on searching
509 break
510 if stop_base_path and no_ancestry:
511 # If we're tracing back ancestry to a specific target stop_base_path and
512 # the ancestry-chain stopped before we reached stop_base_path, then return
513 # nothing since there is no ancestry chaining back to that target.
514 ancestors = []
515 if ancestors:
516 if ui.get_level() >= ui.DEBUG:
517 max_len = 0
518 for idx in range(len(ancestors)):
519 d = ancestors[idx]
520 max_len = max(max_len, len(d['path']+"@"+str(d['revision'])))
521 ui.status(prefix + ">> find_svn_ancestors: Found parent ancestors:", level=ui.DEBUG, color='YELLOW_B')
522 for idx in range(len(ancestors)):
523 d = ancestors[idx]
524 ui.status(prefix + " [%s] %s --> %s", idx,
525 str(d['path']+"@"+str(d['revision'])).ljust(max_len),
526 str(d['copyfrom_path']+"@"+str(d['copyfrom_rev'])),
527 level=ui.DEBUG, color='YELLOW')
528 else:
529 ui.status(prefix + ">> find_svn_ancestors: No ancestor-chain found: %s",
530 svn_repos_url+start_path+"@"+str(start_rev), level=ui.DEBUG, color='YELLOW')
531 return ancestors
532
533 def get_rev_map(source_rev, prefix):
534 """
535 Find the equivalent rev # in the target repo for the given rev # from the source repo.
536 """
537 ui.status(prefix + ">> get_rev_map(%s)", source_rev, level=ui.DEBUG, color='GREEN')
538 # Find the highest entry less-than-or-equal-to source_rev
539 for rev in range(int(source_rev), 0, -1):
540 in_rev_map = True if rev in rev_map else False
541 ui.status(prefix + ">> get_rev_map: rev=%s in_rev_map=%s", rev, str(in_rev_map), level=ui.DEBUG, color='BLACK_B')
542 if in_rev_map:
543 return int(rev_map[rev])
544 # Else, we fell off the bottom of the rev_map. Ruh-roh...
545 return None
546
547 def set_rev_map(source_rev, target_rev):
548 #ui.status(">> set_rev_map: source_rev=%s target_rev=%s", source_rev, target_rev, level=ui.DEBUG, color='GREEN')
549 global rev_map
550 rev_map[int(source_rev)]=int(target_rev)
551
552 def build_rev_map(target_url, target_end_rev, source_info):
553 """
554 Check for any already-replayed history from source_url (source_info) and
555 build the mapping-table of source_rev -> target_rev.
556 """
557 global rev_map
558 rev_map = {}
559 ui.status("Rebuilding target_rev -> source_rev rev_map...", level=ui.VERBOSE)
560 proc_count = 0
561 it_log_entries = svnclient.iter_svn_log_entries(target_url, 1, target_end_rev, get_changed_paths=False, get_revprops=True)
562 for log_entry in it_log_entries:
563 if log_entry['revprops']:
564 revprops = {}
565 for v in log_entry['revprops']:
566 if v['name'].startswith('svn2svn:'):
567 revprops[v['name']] = v['value']
568 if revprops and \
569 revprops['svn2svn:source_uuid'] == source_info['repos_uuid'] and \
570 revprops['svn2svn:source_url'] == source_info['url']:
571 source_rev = revprops['svn2svn:source_rev']
572 target_rev = log_entry['revision']
573 set_rev_map(source_rev, target_rev)
574 proc_count += 1
575 if proc_count % 500 == 0:
576 ui.status("...processed %s (%s of %s)..." % (proc_count, target_rev, target_end_rev), level=ui.VERBOSE)
577
578 def get_svn_dirlist(svn_path, rev_number = ""):
579 """
580 Get a list of all the child contents (recusive) of the given folder path.
581 """
582 args = ["list"]
583 path = svn_path
584 if rev_number:
585 args += ["-r", rev_number]
586 path += "@"+str(rev_number)
587 args += [path]
588 paths = run_svn(args, no_fail=True)
589 paths = paths.strip("\n").split("\n") if len(paths)>1 else []
590 return paths
591
592 def path_in_list(paths, path):
593 for p in paths:
594 if is_child_path(path, p):
595 return True
596 return False
597
598 def add_path(paths, path):
599 if not path_in_list(paths, path):
600 paths.append(path)
601
602 def in_ancestors(ancestors, ancestor):
603 match = True
604 for idx in range(len(ancestors)-1, 0, -1):
605 if int(ancestors[idx]['revision']) > ancestor['revision']:
606 match = is_child_path(ancestor['path'], ancestors[idx]['path'])
607 break
608 return match
609
610 def do_svn_add(source_url, path_offset, source_rev, source_ancestors, \
611 parent_copyfrom_path="", parent_copyfrom_rev="", \
612 export_paths={}, is_dir = False, skip_paths=[], prefix = ""):
613 """
614 Given the add'd source path, replay the "svn add/copy" commands to correctly
615 track renames across copy-from's.
616
617 For example, consider a sequence of events like this:
618 1. svn copy /trunk /branches/fix1
619 2. (Make some changes on /branches/fix1)
620 3. svn mv /branches/fix1/Proj1 /branches/fix1/Proj2 " Rename folder
621 4. svn mv /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt " Rename file inside renamed folder
622 5. svn co /trunk && svn merge /branches/fix1
623 After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
624 and and add of /trunk/Proj2 copy-from /branches/fix1/Proj2. If we were just
625 to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
626 we'd lose the logical history that Proj2/file2.txt is really a descendant
627 of Proj1/file1.txt.
628
629 'path_offset' is the offset from source_base to the file to check ancestry for,
630 e.g. 'projectA/file1.txt'. path = source_repos_url + source_base + path_offset.
631 'source_rev' is the revision ("svn log") that we're processing from the source repo.
632 'parent_copyfrom_path' and 'parent_copyfrom_rev' is the copy-from path of the parent
633 directory, when being called recursively by do_svn_add_dir().
634 'export_paths' is the list of path_offset's that we've deferred running "svn export" on.
635 'is_dir' is whether path_offset is a directory (rather than a file).
636 """
637 source_base = source_url[len(source_repos_url):] # e.g. '/trunk'
638 ui.status(prefix + ">> do_svn_add: %s %s", join_path(source_base, path_offset)+"@"+str(source_rev),
639 " (parent-copyfrom: "+parent_copyfrom_path+"@"+str(parent_copyfrom_rev)+")" if parent_copyfrom_path else "",
640 level=ui.DEBUG, color='GREEN')
641 # Check if the given path has ancestors which chain back to the current source_base
642 found_ancestor = False
643 ancestors = find_svn_ancestors(source_repos_url, join_path(source_base, path_offset), source_rev, stop_base_path=source_base, prefix=prefix+" ")
644 ancestor = ancestors[len(ancestors)-1] if ancestors else None # Choose the eldest ancestor, i.e. where we reached stop_base_path=source_base
645 if ancestor and not in_ancestors(source_ancestors, ancestor):
646 ancestor = None
647 copyfrom_path = ancestor['copyfrom_path'] if ancestor else ""
648 copyfrom_rev = ancestor['copyfrom_rev'] if ancestor else ""
649 if ancestor:
650 # The copy-from path has ancestry back to source_url.
651 ui.status(prefix + ">> do_svn_add: Check copy-from: Found parent: %s", copyfrom_path+"@"+str(copyfrom_rev),
652 level=ui.DEBUG, color='GREEN', bold=True)
653 found_ancestor = True
654 # Map the copyfrom_rev (source repo) to the equivalent target repo rev #. This can
655 # return None in the case where copyfrom_rev is *before* our source_start_rev.
656 tgt_rev = get_rev_map(copyfrom_rev, prefix+" ")
657 ui.status(prefix + ">> do_svn_add: get_rev_map: %s (source) -> %s (target)", copyfrom_rev, tgt_rev, level=ui.DEBUG, color='GREEN')
658 else:
659 ui.status(prefix + ">> do_svn_add: Check copy-from: No ancestor chain found.", level=ui.DEBUG, color='GREEN')
660 found_ancestor = False
661 if found_ancestor and tgt_rev:
662 # Check if this path_offset in the target WC already has this ancestry, in which
663 # case there's no need to run the "svn copy" (again).
664 path_in_svn = in_svn(path_offset, prefix=prefix+" ")
665 log_entry = svnclient.get_last_svn_log_entry(path_offset, 1, 'HEAD', get_changed_paths=False) if in_svn(path_offset, require_in_repo=True, prefix=prefix+" ") else []
666 if (not log_entry or (log_entry['revision'] != tgt_rev)):
667 copyfrom_offset = copyfrom_path[len(source_base):].strip('/')
668 ui.status(prefix + ">> do_svn_add: svn_copy: Copy-from: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
669 ui.status(prefix + " copyfrom: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
670 ui.status(prefix + " p_copyfrom: %s", parent_copyfrom_path+"@"+str(parent_copyfrom_rev) if parent_copyfrom_path else "", level=ui.DEBUG, color='GREEN')
671 if path_in_svn and \
672 ((parent_copyfrom_path and is_child_path(copyfrom_path, parent_copyfrom_path)) and \
673 (parent_copyfrom_rev and copyfrom_rev == parent_copyfrom_rev)):
674 # When being called recursively, if this child entry has the same ancestor as the
675 # the parent, then no need to try to run another "svn copy".
676 ui.status(prefix + ">> do_svn_add: svn_copy: Same ancestry as parent: %s",
677 parent_copyfrom_path+"@"+str(parent_copyfrom_rev),level=ui.DEBUG, color='GREEN')
678 pass
679 else:
680 # Copy this path from the equivalent path+rev in the target repo, to create the
681 # equivalent history.
682 if parent_copyfrom_path:
683 # If we have a parent copy-from path, we mis-match that so display a status
684 # message describing the action we're mimic'ing. If path_in_svn, then this
685 # is logically a "replace" rather than an "add".
686 ui.status(" %s %s (from %s)", ('R' if path_in_svn else 'A'), join_path(source_base, path_offset), ancestors[0]['copyfrom_path']+"@"+str(copyfrom_rev), level=ui.VERBOSE)
687 if path_in_svn:
688 # If local file is already under version-control, then this is a replace.
689 ui.status(prefix + ">> do_svn_add: pre-copy: local path already exists: %s", path_offset, level=ui.DEBUG, color='GREEN')
690 run_svn(["update", path_offset])
691 run_svn(["remove", "--force", path_offset])
692 run_svn(["copy", "-r", tgt_rev, join_path(target_url, copyfrom_offset)+"@"+str(tgt_rev), path_offset])
693 if is_dir:
694 # Export the final verison of all files in this folder.
695 add_path(export_paths, path_offset)
696 else:
697 # Export the final verison of this file.
698 run_svn(["export", "--force", "-r", source_rev,
699 source_repos_url+join_path(source_base, path_offset)+"@"+str(source_rev), path_offset])
700 if options.keep_prop:
701 sync_svn_props(source_url, source_rev, path_offset)
702 else:
703 ui.status(prefix + ">> do_svn_add: Skipped 'svn copy': %s", path_offset, level=ui.DEBUG, color='GREEN')
704 else:
705 # Else, either this copy-from path has no ancestry back to source_url OR copyfrom_rev comes
706 # before our initial source_start_rev (i.e. tgt_rev == None), so can't do a "svn copy".
707 # Create (parent) directory if needed.
708 # TODO: This is (nearly) a duplicate of code in process_svn_log_entry(). Should this be
709 # split-out to a shared tag?
710 p_path = path_offset if is_dir else os.path.dirname(path_offset).strip() or None
711 if p_path and not os.path.exists(p_path):
712 run_svn(["mkdir", p_path])
713 if not in_svn(path_offset, prefix=prefix+" "):
714 if is_dir:
715 # Export the final verison of all files in this folder.
716 add_path(export_paths, path_offset)
717 else:
718 # Export the final verison of this file. We *need* to do this before running
719 # the "svn add", even if we end-up re-exporting this file again via export_paths.
720 run_svn(["export", "--force", "-r", source_rev,
721 source_repos_url+join_path(source_base, path_offset)+"@"+str(source_rev), path_offset])
722 # If not already under version-control, then "svn add" this file/folder.
723 run_svn(["add", "--parents", path_offset])
724 if options.keep_prop:
725 sync_svn_props(source_url, source_rev, path_offset)
726 if is_dir:
727 # For any folders that we process, process any child contents, so that we correctly
728 # replay copies/replaces/etc.
729 do_svn_add_dir(source_url, path_offset, source_rev, source_ancestors,
730 copyfrom_path, copyfrom_rev, export_paths, skip_paths, prefix+" ")
731
732 def do_svn_add_dir(source_url, path_offset, source_rev, source_ancestors, \
733 parent_copyfrom_path, parent_copyfrom_rev, \
734 export_paths, skip_paths, prefix=""):
735 source_base = source_url[len(source_repos_url):] # e.g. '/trunk'
736 # Get the directory contents, to compare between the local WC (target_url) vs. the remote repo (source_url)
737 # TODO: paths_local won't include add'd paths because "svn ls" lists the contents of the
738 # associated remote repo folder. (Is this a problem?)
739 paths_local = get_svn_dirlist(path_offset)
740 paths_remote = get_svn_dirlist(join_path(source_url, path_offset), source_rev)
741 ui.status(prefix + ">> do_svn_add_dir: paths_local: %s", str(paths_local), level=ui.DEBUG, color='GREEN')
742 ui.status(prefix + ">> do_svn_add_dir: paths_remote: %s", str(paths_remote), level=ui.DEBUG, color='GREEN')
743 # Update files/folders which exist in remote but not local
744 for path in paths_remote:
745 path_is_dir = True if path[-1] == "/" else False
746 working_path = join_path(path_offset, (path.rstrip('/') if path_is_dir else path)).lstrip('/')
747 #print "working_path:%s = path_offset:%s + path:%s" % (working_path, path_offset, path)
748 if not working_path in skip_paths:
749 do_svn_add(source_url, working_path, source_rev, source_ancestors,
750 parent_copyfrom_path, parent_copyfrom_rev,
751 export_paths, path_is_dir, skip_paths, prefix+" ")
752 # Remove files/folders which exist in local but not remote
753 for path in paths_local:
754 if not path in paths_remote:
755 path_is_dir = True if path[-1] == "/" else False
756 working_path = join_path(path_offset, (path.rstrip('/') if path_is_dir else path)).lstrip('/')
757 ui.status(" %s %s", 'D', join_path(source_base, working_path), level=ui.VERBOSE)
758 run_svn(["update", working_path])
759 run_svn(["remove", "--force", working_path])
760 # TODO: Does this handle deleted folders too? Wouldn't want to have a case
761 # where we only delete all files from folder but leave orphaned folder around.
762
763 def process_svn_log_entry(log_entry, ancestors, commit_paths, prefix = ""):
764 """
765 Process SVN changes from the given log entry. Build an array (commit_paths)
766 of the paths in the working-copy that were changed, i.e. the paths which
767 we'll pass to "svn commit".
768 """
769 export_paths = []
770 source_rev = log_entry['revision']
771 source_url = log_entry['url']
772 source_base = source_url[len(source_repos_url):] # e.g. '/trunk'
773 ui.status(prefix + ">> process_svn_log_entry: %s", source_url+"@"+str(source_rev), level=ui.DEBUG, color='GREEN')
774 for d in log_entry['changed_paths']:
775 # Get the full path for this changed_path
776 # e.g. '/branches/bug123/projectA/file1.txt'
777 path = d['path']
778 if not is_child_path(path, source_base):
779 # Ignore changed files that are not part of this subdir
780 ui.status(prefix + ">> process_svn_log_entry: Unrelated path: %s (base: %s)", path, source_base, level=ui.DEBUG, color='GREEN')
781 continue
782 if d['kind'] == "" or d['kind'] == 'none':
783 # The "kind" value was introduced in SVN 1.6, and "svn log --xml" won't return a "kind"
784 # value for commits made on a pre-1.6 repo, even if the server is now running 1.6.
785 # We need to use other methods to fetch the node-kind for these cases.
786 d['kind'] = svnclient.get_kind(source_repos_url, path, source_rev, d['action'], log_entry['changed_paths'])
787 assert (d['kind'] == 'file') or (d['kind'] == 'dir')
788 path_is_dir = True if d['kind'] == 'dir' else False
789 path_is_file = True if d['kind'] == 'file' else False
790 # Calculate the offset (based on source_base) for this changed_path
791 # e.g. 'projectA/file1.txt'
792 # (path = source_base + "/" + path_offset)
793 path_offset = path[len(source_base):].strip("/")
794 # Get the action for this path
795 action = d['action']
796 if action not in _valid_svn_actions:
797 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
798 % (source_rev, action))
799 ui.status(" %s %s%s", action, d['path'],
800 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
801 level=ui.VERBOSE)
802
803 # Try to be efficient and keep track of an explicit list of paths in the
804 # working copy that changed. If we commit from the root of the working copy,
805 # then SVN needs to crawl the entire working copy looking for pending changes.
806 commit_paths.append(path_offset)
807
808 # Special-handling for replace's
809 if action == 'R':
810 # If file was "replaced" (deleted then re-added, all in same revision),
811 # then we need to run the "svn rm" first, then change action='A'. This
812 # lets the normal code below handle re-"svn add"'ing the files. This
813 # should replicate the "replace".
814 if path_offset and in_svn(path_offset):
815 # Target path might not be under version-control yet, e.g. parent "add"
816 # was a copy-from a branch which had no ancestry back to trunk, and each
817 # child folder under that parent folder is a "replace" action on the final
818 # merge to trunk. Since the child folders will be in skip_paths, do_svn_add
819 # wouldn't have created them while processing the parent "add" path.
820 if path_is_dir:
821 # Need to "svn update" before "svn remove" in case child contents are at
822 # a higher rev than the (parent) path_offset.
823 run_svn(["update", path_offset])
824 run_svn(["remove", "--force", path_offset])
825 action = 'A'
826
827 # Handle all the various action-types
828 # (Handle "add" first, for "svn copy/move" support)
829 if action == 'A':
830 # Determine where to export from.
831 svn_copy = False
832 # Handle cases where this "add" was a copy from another URL in the source repo
833 if d['copyfrom_revision']:
834 copyfrom_path = d['copyfrom_path']
835 copyfrom_rev = d['copyfrom_revision']
836 skip_paths = []
837 for tmp_d in log_entry['changed_paths']:
838 tmp_path = tmp_d['path']
839 if is_child_path(tmp_path, path) and tmp_d['action'] in 'ARD':
840 # Build list of child entries which are also in the changed_paths list,
841 # so that do_svn_add() can skip processing these entries when recursing
842 # since we'll end-up processing them later. Don't include action="M" paths
843 # in this list because it's non-conclusive: it could just mean that the
844 # file was modified *after* the copy-from, so we still want do_svn_add()
845 # to re-create the correct ancestry.
846 tmp_path_offset = tmp_path[len(source_base):].strip("/")
847 skip_paths.append(tmp_path_offset)
848 do_svn_add(source_url, path_offset, source_rev, ancestors, "", "", export_paths, path_is_dir, skip_paths, prefix+" ")
849 # Else just "svn export" the files from the source repo and "svn add" them.
850 else:
851 # Create (parent) directory if needed
852 p_path = path_offset if path_is_dir else os.path.dirname(path_offset).strip() or None
853 if p_path and not os.path.exists(p_path):
854 run_svn(["mkdir", p_path])
855 # Export the entire added tree.
856 if path_is_dir:
857 # For directories, defer the (recurisve) "svn export". Might have a
858 # situation in a branch merge where the entry in the svn-log is a
859 # non-copy-from'd "add" but there are child contents (that we haven't
860 # gotten to yet in log_entry) that are copy-from's. When we try do
861 # the "svn copy" later on in do_svn_add() for those copy-from'd paths,
862 # having pre-existing (svn-add'd) contents creates some trouble.
863 # Instead, just create the stub folders ("svn mkdir" above) and defer
864 # exporting the final file-state until the end.
865 add_path(export_paths, path_offset)
866 else:
867 # Export the final verison of this file. We *need* to do this before running
868 # the "svn add", even if we end-up re-exporting this file again via export_paths.
869 run_svn(["export", "--force", "-r", source_rev,
870 join_path(source_url, path_offset)+"@"+str(source_rev), path_offset])
871 if not in_svn(path_offset, prefix=prefix+" "):
872 # Need to use in_svn here to handle cases where client committed the parent
873 # folder and each indiv sub-folder.
874 run_svn(["add", "--parents", path_offset])
875 if options.keep_prop:
876 sync_svn_props(source_url, source_rev, path_offset)
877
878 elif action == 'D':
879 if path_is_dir:
880 # For dirs, need to "svn update" before "svn remove" because the final
881 # "svn commit" will fail if the parent (path_offset) is at a lower rev
882 # than any of the child contents. This needs to be a recursive update.
883 run_svn(["update", path_offset])
884 run_svn(["remove", "--force", path_offset])
885
886 elif action == 'M':
887 if path_is_file:
888 run_svn(["export", "--force", "-N" , "-r", source_rev,
889 join_path(source_url, path_offset)+"@"+str(source_rev), path_offset])
890 if path_is_dir:
891 # For dirs, need to "svn update" before export/prop-sync because the
892 # final "svn commit" will fail if the parent is at a lower rev than
893 # child contents. Just need to update the rev-state of the dir (d['path']),
894 # don't need to recursively update all child contents.
895 # (??? is this the right reason?)
896 run_svn(["update", "-N", path_offset])
897 if options.keep_prop:
898 sync_svn_props(source_url, source_rev, path_offset)
899
900 else:
901 raise InternalError("Internal Error: process_svn_log_entry: Unhandled 'action' value: '%s'"
902 % action)
903
904 # Export the final version of all add'd paths from source_url
905 if export_paths:
906 for path_offset in export_paths:
907 run_svn(["export", "--force", "-r", source_rev,
908 join_path(source_url, path_offset)+"@"+str(source_rev), path_offset])
909
910 def keep_revnum(source_rev, target_rev_last, wc_target_tmp):
911 """
912 Add "padding" target revisions as needed to keep source and target
913 revision #'s identical.
914 """
915 if int(source_rev) <= int(target_rev_last):
916 raise InternalError("keep-revnum mode is enabled, "
917 "but source revision (r%s) is less-than-or-equal last target revision (r%s)" % \
918 (source_rev, target_rev_last))
919 if int(target_rev_last) < int(source_rev)-1:
920 # Add "padding" target revisions to keep source and target rev #'s identical
921 if os.path.exists(wc_target_tmp):
922 shutil.rmtree(wc_target_tmp)
923 run_svn(["checkout", "-r", "HEAD", "--depth=empty", target_repos_url, wc_target_tmp])
924 for rev_num in range(int(target_rev_last)+1, int(source_rev)):
925 run_svn(["propset", "svn2svn:keep-revnum", rev_num, wc_target_tmp])
926 output = run_svn(["commit", "-m", "", wc_target_tmp])
927 rev_num_tmp = parse_svn_commit_rev(output) if output else None
928 assert rev_num == rev_num_tmp
929 ui.status("Committed revision %s (keep-revnum).", rev_num)
930 target_rev_last = rev_num
931 shutil.rmtree(wc_target_tmp)
932 return target_rev_last
933
934 def disp_svn_log_summary(log_entry):
935 ui.status("------------------------------------------------------------------------")
936 ui.status("r%s | %s | %s",
937 log_entry['revision'],
938 log_entry['author'],
939 str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' ')))
940 ui.status(log_entry['message'])
941
942 def real_main(args, parser):
943 global source_url, target_url, rev_map
944 source_url = args.pop(0).rstrip("/") # e.g. 'http://server/svn/source/trunk'
945 target_url = args.pop(0).rstrip("/") # e.g. 'file:///svn/target/trunk'
946 ui.status("options: %s", str(options), level=ui.DEBUG, color='GREEN')
947
948 # Make sure that both the source and target URL's are valid
949 source_info = svnclient.get_svn_info(source_url)
950 assert is_child_path(source_url, source_info['repos_url'])
951 target_info = svnclient.get_svn_info(target_url)
952 assert is_child_path(target_url, target_info['repos_url'])
953
954 # Init global vars
955 global source_repos_url,source_base,source_repos_uuid
956 source_repos_url = source_info['repos_url'] # e.g. 'http://server/svn/source'
957 source_base = source_url[len(source_repos_url):] # e.g. '/trunk'
958 source_repos_uuid = source_info['repos_uuid']
959 global target_repos_url,target_base
960 target_repos_url = target_info['repos_url'] # e.g. 'http://server/svn/target'
961 target_base = target_url[len(target_repos_url):] # e.g. '/trunk'
962
963 # Init start and end revision
964 try:
965 source_start_rev = svnclient.get_svn_rev(source_repos_url, options.rev_start if options.rev_start else 1)
966 except ExternalCommandFailed:
967 parser.error("invalid start source revision value: %s" % (options.rev_start))
968 try:
969 source_end_rev = svnclient.get_svn_rev(source_repos_url, options.rev_end if options.rev_end else "HEAD")
970 except ExternalCommandFailed:
971 parser.error("invalid end source revision value: %s" % (options.rev_end))
972 ui.status("Using source revision range %s:%s", source_start_rev, source_end_rev, level=ui.VERBOSE)
973
974 # TODO: If options.keep_date, should we try doing a "svn propset" on an *existing* revision
975 # as a sanity check, so we check if the pre-revprop-change hook script is correctly setup
976 # before doing first replay-commit?
977
978 target_rev_last = target_info['revision'] # Last revision # in the target repo
979 wc_target = os.path.abspath('_wc_target')
980 wc_target_tmp = os.path.abspath('_tmp_wc_target')
981 num_entries_proc = 0
982 commit_count = 0
983 source_rev = None
984 target_rev = None
985
986 # Check out a working copy of target_url if needed
987 wc_exists = os.path.exists(wc_target)
988 if wc_exists and not options.cont_from_break:
989 shutil.rmtree(wc_target)
990 wc_exists = False
991 if not wc_exists:
992 ui.status("Checking-out _wc_target...", level=ui.VERBOSE)
993 svnclient.svn_checkout(target_url, wc_target)
994 os.chdir(wc_target)
995 if wc_exists:
996 # If using an existing WC, make sure it's clean ("svn revert")
997 ui.status("Cleaning-up _wc_target...", level=ui.VERBOSE)
998 run_svn(["cleanup"])
999 full_svn_revert()
1000
1001 if not options.cont_from_break:
1002 # TODO: Warn user if trying to start (non-continue) into a non-empty target path?
1003 # Get the first log entry at/after source_start_rev, which is where
1004 # we'll do the initial import from.
1005 source_ancestors = find_svn_ancestors(source_repos_url, source_base, source_end_rev, prefix=" ")
1006 it_log_start = svnclient.iter_svn_log_entries(source_url, source_start_rev, source_end_rev, get_changed_paths=False, ancestors=source_ancestors)
1007 source_start_log = None
1008 for log_entry in it_log_start:
1009 # Pick the first entry. Need to use a "for ..." loop since we're using an iterator.
1010 source_start_log = log_entry
1011 break
1012 if not source_start_log:
1013 raise InternalError("Unable to find any matching revisions between %s:%s in source_url: %s" % \
1014 (source_start_rev, source_end_rev, source_url))
1015
1016 # This is the revision we will start from for source_url
1017 source_start_rev = int(source_start_log['revision'])
1018 ui.status("Starting at source revision %s.", source_start_rev, level=ui.VERBOSE)
1019 ui.status("")
1020 if options.keep_revnum and source_rev > target_rev_last:
1021 target_rev_last = keep_revnum(source_rev, target_rev_last, wc_target_tmp)
1022
1023 # For the initial commit to the target URL, export all the contents from
1024 # the source URL at the start-revision.
1025 disp_svn_log_summary(svnclient.get_one_svn_log_entry(source_repos_url, source_start_rev, source_start_rev))
1026 # Export and add file-contents from source_url@source_start_rev
1027 source_start_url = source_url if not source_ancestors else source_repos_url+source_ancestors[len(source_ancestors)-1]['copyfrom_path']
1028 top_paths = run_svn(["list", "-r", source_start_rev, source_start_url+"@"+str(source_start_rev)])
1029 top_paths = top_paths.strip("\n").split("\n")
1030 for path in top_paths:
1031 # For each top-level file/folder...
1032 if not path:
1033 continue
1034 # Directories have a trailing slash in the "svn list" output
1035 path_is_dir = True if path[-1] == "/" else False
1036 path_offset = path.rstrip('/') if path_is_dir else path
1037 if in_svn(path_offset, prefix=" "):
1038 raise InternalError("Cannot replay history on top of pre-existing structure: %s" % join_path(source_start_url, path_offset))
1039 if path_is_dir and not os.path.exists(path_offset):
1040 os.makedirs(path_offset)
1041 run_svn(["export", "--force", "-r" , source_start_rev, join_path(source_start_url, path_offset)+"@"+str(source_start_rev), path_offset])
1042 run_svn(["add", path_offset])
1043 # Update any properties on the newly added content
1044 paths = run_svn(["list", "--recursive", "-r", source_start_rev, source_start_url+"@"+str(source_start_rev)])
1045 paths = paths.strip("\n").split("\n")
1046 if options.keep_prop:
1047 sync_svn_props(source_start_url, source_start_rev, "")
1048 for path in paths:
1049 if not path:
1050 continue
1051 # Directories have a trailing slash in the "svn list" output
1052 path_is_dir = True if path[-1] == "/" else False
1053 path_offset = path.rstrip('/') if path_is_dir else path
1054 ui.status(" A %s", join_path(source_base, path_offset), level=ui.VERBOSE)
1055 if options.keep_prop:
1056 sync_svn_props(source_start_url, source_start_rev, path_offset)
1057 # Commit the initial import
1058 num_entries_proc += 1
1059 target_revprops = gen_tracking_revprops(source_start_rev) # Build source-tracking revprop's
1060 target_rev = commit_from_svn_log_entry(source_start_log, target_revprops=target_revprops)
1061 if target_rev:
1062 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
1063 set_rev_map(source_start_rev, target_rev)
1064 commit_count += 1
1065 target_rev_last = target_rev
1066 if options.verify:
1067 verify_commit(source_rev, target_rev_last)
1068 else:
1069 # Re-build the rev_map based on any already-replayed history in target_url
1070 build_rev_map(target_url, target_rev_last, source_info)
1071 if not rev_map:
1072 parser.error("called with continue-mode, but no already-replayed source history found in target_url")
1073 source_start_rev = int(max(rev_map, key=rev_map.get))
1074 assert source_start_rev
1075 ui.status("Continuing from source revision %s.", source_start_rev, level=ui.VERBOSE)
1076 ui.status("", level=ui.VERBOSE)
1077
1078 if options.keep_revnum and source_start_rev < target_rev_last:
1079 parser.error("last target revision is equal-or-higher than starting source revision; "
1080 "cannot use --keep-revnum mode")
1081
1082 svn_vers_t = svnclient.get_svn_client_version()
1083 svn_vers = float(".".join(map(str, svn_vers_t[0:2])))
1084
1085 # Load SVN log starting from source_start_rev + 1
1086 source_ancestors = find_svn_ancestors(source_repos_url, source_base, source_end_rev, prefix=" ")
1087 it_log_entries = svnclient.iter_svn_log_entries(source_url, source_start_rev+1, source_end_rev, get_revprops=True, ancestors=source_ancestors) if source_start_rev < source_end_rev else []
1088 source_rev = None
1089
1090 try:
1091 for log_entry in it_log_entries:
1092 if options.entries_proc_limit:
1093 if num_entries_proc >= options.entries_proc_limit:
1094 break
1095 # Replay this revision from source_url into target_url
1096 source_rev = log_entry['revision']
1097 log_url = log_entry['url']
1098 #print "source_url:%s log_url:%s" % (source_url, log_url)
1099 if options.keep_revnum:
1100 target_rev_last = keep_revnum(source_rev, target_rev_last, wc_target_tmp)
1101 disp_svn_log_summary(log_entry)
1102 # Process all the changed-paths in this log entry
1103 commit_paths = []
1104 process_svn_log_entry(log_entry, source_ancestors, commit_paths)
1105 num_entries_proc += 1
1106 # Commit any changes made to _wc_target
1107 target_revprops = gen_tracking_revprops(source_rev) # Build source-tracking revprop's
1108 target_rev = commit_from_svn_log_entry(log_entry, commit_paths, target_revprops=target_revprops)
1109 if target_rev:
1110 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
1111 source_rev = log_entry['revision']
1112 set_rev_map(source_rev, target_rev)
1113 target_rev_last = target_rev
1114 commit_count += 1
1115 if options.verify:
1116 verify_commit(source_rev, target_rev_last, log_entry)
1117 # Run "svn cleanup" every 100 commits if SVN 1.7+, to clean-up orphaned ".svn/pristines/*"
1118 if svn_vers >= 1.7 and (commit_count % 100 == 0):
1119 run_svn(["cleanup"])
1120 if not source_rev:
1121 # If there were no new source_url revisions to process, init source_rev
1122 # for the "finally" message below to be the last source revision replayed.
1123 source_rev = source_start_rev
1124 if options.verify:
1125 verify_commit(source_start_rev, target_rev_last)
1126
1127 except KeyboardInterrupt:
1128 print "\nStopped by user."
1129 print "\nCleaning-up..."
1130 run_svn(["cleanup"])
1131 full_svn_revert()
1132 except:
1133 print "\nCommand failed with following error:\n"
1134 traceback.print_exc()
1135 print "\nCleaning-up..."
1136 run_svn(["cleanup"])
1137 print run_svn(["status"])
1138 full_svn_revert()
1139 finally:
1140 print "\nFinished at source revision %s%s." % (source_rev, " (dry-run)" if options.dry_run else "")
1141
1142 def main():
1143 # Defined as entry point. Must be callable without arguments.
1144 usage = "svn2svn, version %s\n" % str(full_version) + \
1145 "<http://nynim.org/projects/svn2svn> <https://github.com/tonyduckles/svn2svn>\n\n" + \
1146 "Usage: %prog [OPTIONS] source_url target_url\n"
1147 description = """\
1148 Replicate (replay) history from one SVN repository to another. Maintain
1149 logical ancestry wherever possible, so that 'svn log' on the replayed repo
1150 will correctly follow file/folder renames.
1151
1152 Examples:
1153 Create a copy of only /trunk from source repo, starting at r5000
1154 $ svnadmin create /svn/target
1155 $ svn mkdir -m 'Add trunk' file:///svn/target/trunk
1156 $ svn2svn -av -r 5000 http://server/source/trunk file:///svn/target/trunk
1157 1. The target_url will be checked-out to ./_wc_target
1158 2. The first commit to http://server/source/trunk at/after r5000 will be
1159 exported & added into _wc_target
1160 3. All revisions affecting http://server/source/trunk (starting at r5000)
1161 will be replayed to _wc_target. Any add/copy/move/replaces that are
1162 copy-from'd some path outside of /trunk (e.g. files renamed on a
1163 /branch and branch was merged into /trunk) will correctly maintain
1164 logical ancestry where possible.
1165
1166 Use continue-mode (-c) to pick-up where the last run left-off
1167 $ svn2svn -avc http://server/source/trunk file:///svn/target/trunk
1168 1. The target_url will be checked-out to ./_wc_target, if not already
1169 checked-out
1170 2. All new revisions affecting http://server/source/trunk starting from
1171 the last replayed revision to file:///svn/target/trunk (based on the
1172 svn2svn:* revprops) will be replayed to _wc_target, maintaining all
1173 logical ancestry where possible."""
1174 parser = optparse.OptionParser(usage, description=description,
1175 formatter=HelpFormatter(), version="%prog "+str(full_version))
1176 parser.add_option("-v", "--verbose", dest="verbosity", action="count", default=1,
1177 help="enable additional output (use -vv or -vvv for more)")
1178 parser.add_option("-a", "--archive", action="store_true", dest="archive", default=False,
1179 help="archive/mirror mode; same as -UDP (see REQUIRE's below)\n"
1180 "maintain same commit author, same commit time, and file/dir properties")
1181 parser.add_option("-U", "--keep-author", action="store_true", dest="keep_author", default=False,
1182 help="maintain same commit authors (svn:author) as source\n"
1183 "(REQUIRES target_url be non-auth'd, e.g. file://-based, since this uses --username to set author)")
1184 parser.add_option("-D", "--keep-date", action="store_true", dest="keep_date", default=False,
1185 help="maintain same commit time (svn:date) as source\n"
1186 "(REQUIRES 'pre-revprop-change' hook script to allow 'svn:date' changes)")
1187 parser.add_option("-P", "--keep-prop", action="store_true", dest="keep_prop", default=False,
1188 help="maintain same file/dir SVN properties as source")
1189 parser.add_option("-R", "--keep-revnum", action="store_true", dest="keep_revnum", default=False,
1190 help="maintain same rev #'s as source. creates placeholder target "
1191 "revisions (by modifying a 'svn2svn:keep-revnum' property at the root of the target repo)")
1192 parser.add_option("-c", "--continue", action="store_true", dest="cont_from_break",
1193 help="continue from last source commit to target (based on svn2svn:* revprops)")
1194 parser.add_option("-r", "--revision", type="string", dest="revision", metavar="ARG",
1195 help="revision range to replay from source_url\n"
1196 "A revision argument can be one of:\n"
1197 " START start rev # (end will be 'HEAD')\n"
1198 " START:END start and ending rev #'s\n"
1199 "Any revision # formats which SVN understands are "
1200 "supported, e.g. 'HEAD', '{2010-01-31}', etc.")
1201 parser.add_option("-u", "--log-author", action="store_true", dest="log_author", default=False,
1202 help="append source commit author to replayed commit mesages")
1203 parser.add_option("-d", "--log-date", action="store_true", dest="log_date", default=False,
1204 help="append source commit time to replayed commit messages")
1205 parser.add_option("-l", "--limit", type="int", dest="entries_proc_limit", metavar="NUM",
1206 help="maximum number of source revisions to process")
1207 parser.add_option("-n", "--dry-run", action="store_true", dest="dry_run", default=False,
1208 help="process next source revision but don't commit changes to "
1209 "target working-copy (forces --limit=1)")
1210 parser.add_option("-x", "--verify", action="store_const", const=1, dest="verify",
1211 help="verify ancestry and content for changed paths in commit after every target commit or last target commit")
1212 parser.add_option("-X", "--verify-all", action="store_const", const=2, dest="verify",
1213 help="verify ancestry and content for entire target_url tree after every target commit or last target commit")
1214 parser.add_option("--debug", dest="verbosity", const=ui.DEBUG, action="store_const",
1215 help="enable debugging output (same as -vvv)")
1216 global options
1217 options, args = parser.parse_args()
1218 if len(args) != 2:
1219 parser.error("incorrect number of arguments")
1220 if options.verbosity < 10:
1221 # Expand multiple "-v" arguments to a real ui._level value
1222 options.verbosity *= 10
1223 if options.dry_run:
1224 # When in dry-run mode, only try to process the next log_entry
1225 options.entries_proc_limit = 1
1226 options.rev_start = None
1227 options.rev_end = None
1228 if options.revision:
1229 # Reg-ex for matching a revision arg (http://svnbook.red-bean.com/en/1.5/svn.tour.revs.specifiers.html#svn.tour.revs.dates)
1230 rev_patt = '[0-9A-Z]+|\{[0-9A-Za-z/\\ :-]+\}'
1231 rev = None
1232 match = re.match('^('+rev_patt+'):('+rev_patt+')$', options.revision) # First try start:end match
1233 if match is None: match = re.match('^('+rev_patt+')$', options.revision) # Next, try start match
1234 if match is None:
1235 parser.error("unexpected --revision argument format; see 'svn help log' for valid revision formats")
1236 rev = match.groups()
1237 options.rev_start = rev[0] if len(rev)>0 else None
1238 options.rev_end = rev[1] if len(rev)>1 else None
1239 if options.archive:
1240 options.keep_author = True
1241 options.keep_date = True
1242 options.keep_prop = True
1243 ui.update_config(options)
1244 return real_main(args, parser)
1245
1246
1247 if __name__ == "__main__":
1248 sys.exit(main() or 0)