]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn/run/svn2svn.py
Verify-mode
[svn2svn.git] / svn2svn / run / svn2svn.py
1 """
2 Replicate (replay) changesets from one SVN repository to another.
3 """
4
5 from .. import base_version, full_version
6 from .. import ui
7 from .. import svnclient
8 from ..shell import run_svn,run_shell_command
9 from ..errors import (ExternalCommandFailed, UnsupportedSVNAction, InternalError, VerificationError)
10 from parse import HelpFormatter
11
12 import sys
13 import os
14 import time
15 import traceback
16 import shutil
17 import operator
18 import optparse
19 import re
20 from datetime import datetime
21
22 _valid_svn_actions = "MARD" # The list of known SVN action abbr's, from "svn log"
23
24 # Module-level variables/parameters
25 source_url = "" # URL to source path in source SVN repo, e.g. 'http://server/svn/source/trunk'
26 source_repos_url = "" # URL to root of source SVN repo, e.g. 'http://server/svn/source'
27 source_base = "" # Relative path of source_url in source SVN repo, e.g. '/trunk'
28 source_repos_uuid = "" # UUID of source SVN repo
29 target_url ="" # URL to target path in target SVN repo, e.g. 'file:///svn/repo_target/trunk'
30 target_repos_url = "" # URL to root of target SVN repo, e.g. 'http://server/svn/target'
31 target_base = "" # Relative path of target_url in target SVN repo, e.g. '/trunk'
32 rev_map = {} # The running mapping-table dictionary for source_url rev #'s -> target_url rev #'s
33 options = None # optparser options
34
35 def parse_svn_commit_rev(output):
36 """
37 Parse the revision number from the output of "svn commit".
38 """
39 output_lines = output.strip("\n").split("\n")
40 rev_num = None
41 for line in output_lines:
42 if line[0:19] == 'Committed revision ':
43 rev_num = line[19:].rstrip('.')
44 break
45 assert rev_num is not None
46 return int(rev_num)
47
48 def commit_from_svn_log_entry(log_entry, commit_paths=None, target_revprops=None):
49 """
50 Given an SVN log entry and an optional list of changed paths, do an svn commit.
51 """
52 # TODO: Run optional external shell hook here, for doing pre-commit filtering
53 # Display the _wc_target "svn status" info if running in -vv (or higher) mode
54 if ui.get_level() >= ui.EXTRA:
55 ui.status(">> commit_from_svn_log_entry: Pre-commit _wc_target status:", level=ui.EXTRA, color='CYAN')
56 ui.status(run_svn(["status"]), level=ui.EXTRA, color='CYAN')
57 # This will use the local timezone for displaying commit times
58 timestamp = int(log_entry['date'])
59 svn_date = str(datetime.fromtimestamp(timestamp))
60 # Uncomment this one one if you prefer UTC commit times
61 #svn_date = "%d 0" % timestamp
62 args = ["commit", "--force-log"]
63 message = log_entry['message']
64 if options.log_date:
65 message += "\nDate: " + svn_date
66 if options.log_author:
67 message += "\nAuthor: " + log_entry['author']
68 if options.keep_author:
69 args += ["--username", log_entry['author']]
70 args += ["-m", message]
71 revprops = {}
72 if log_entry['revprops']:
73 # Carry forward any revprop's from the source revision
74 for v in log_entry['revprops']:
75 revprops[v['name']] = v['value']
76 if target_revprops:
77 # Add any extra revprop's we want to set for the target repo commits
78 for v in target_revprops:
79 revprops[v['name']] = v['value']
80 if revprops:
81 for key in revprops:
82 args += ["--with-revprop", "%s=%s" % (key, str(revprops[key]))]
83 if commit_paths:
84 if len(commit_paths)<100:
85 # If we don't have an excessive amount of individual changed paths, pass
86 # those to the "svn commit" command. Else, pass nothing so we commit at
87 # the root of the working-copy.
88 args += list(commit_paths)
89 rev_num = None
90 if not options.dry_run:
91 # Run the "svn commit" command, and screen-scrape the target_rev value (if any)
92 output = run_svn(args)
93 rev_num = parse_svn_commit_rev(output) if output else None
94 if rev_num is not None:
95 ui.status("Committed revision %s.", rev_num)
96 if options.keep_date:
97 run_svn(["propset", "--revprop", "-r", rev_num, "svn:date", log_entry['date_raw']])
98 return rev_num
99
100 def verify_commit(source_rev, target_rev, log_entry=None):
101 """
102 Compare the ancestry/content/properties between source_url vs target_url
103 for a given revision.
104 """
105 # Gather the offsets in the source repo to check
106 check_paths = []
107 remove_paths = []
108 # TODO: Need to make this ancestry aware
109 if options.verify == 1 and log_entry is not None: # Changed only
110 ui.status("Verifying source revision %s (only-changed)...", source_rev, level=ui.VERBOSE)
111 for d in log_entry['changed_paths']:
112 path = d['path']
113 if not is_child_path(path, source_base):
114 continue
115 if d['kind'] == "":
116 d['kind'] = svnclient.get_kind(source_repos_url, path, source_rev, d['action'], log_entry['changed_paths'])
117 assert (d['kind'] == 'file') or (d['kind'] == 'dir')
118 path_is_dir = True if d['kind'] == 'dir' else False
119 path_is_file = True if d['kind'] == 'file' else False
120 path_offset = path[len(source_base):].strip("/")
121 if d['action'] == 'D':
122 remove_paths.append(path_offset)
123 elif not path_offset in check_paths:
124 ui.status("verify_commit: path [mode=changed]: kind=%s: %s", d['kind'], path, level=ui.DEBUG, color='YELLOW')
125 if path_is_file:
126 ui.status(" "+"verify_commit [mode=changed]: check_paths.append('%s')", path_offset, level=ui.DEBUG, color='GREEN')
127 check_paths.append(path_offset)
128 if path_is_dir:
129 if not d['action'] in 'AR':
130 continue
131 child_paths = run_svn(["list", "--recursive", "-r", source_rev, source_url.rstrip("/")+"/"+path_offset+"@"+str(source_rev)])
132 child_paths = child_paths.strip("\n").split("\n")
133 for child_path in child_paths:
134 if not child_path:
135 continue
136 # Directories have a trailing slash in the "svn list" output
137 child_path_is_dir = True if child_path[-1] == "/" else False
138 child_path_offset = child_path.rstrip('/') if child_path_is_dir else child_path
139 if not child_path_is_dir:
140 # Only check files
141 working_path = (path_offset+"/" if path_offset else "") + child_path_offset
142 if not working_path in check_paths:
143 ui.status(" "+"verify_commit [mode=changed]: check_paths.append('%s'+'/'+'%s')", path_offset, child_path_offset, level=ui.DEBUG, color='GREEN')
144 check_paths.append(working_path)
145 if options.verify == 2: # All paths
146 ui.status("Verifying source revision %s (all)...", source_rev, level=ui.VERBOSE)
147 child_paths = run_svn(["list", "--recursive", "-r", source_rev, source_url+"@"+str(source_rev)])
148 child_paths = child_paths.strip("\n").split("\n")
149 for child_path in child_paths:
150 if not child_path:
151 continue
152 # Directories have a trailing slash in the "svn list" output
153 child_path_is_dir = True if child_path[-1] == "/" else False
154 child_path_offset = child_path.rstrip('/') if child_path_is_dir else child_path
155 if not child_path_is_dir:
156 # Only check files
157 ui.status("verify_commit [mode=all]: check_paths.append('%s')", child_path_offset, level=ui.DEBUG, color='GREEN')
158 check_paths.append(child_path_offset)
159
160 # If there were any paths deleted in the last revision (options.verify=1 mode),
161 # check that they were correctly deleted.
162 if remove_paths:
163 count_total = len(remove_paths)
164 count = 0
165 for path_offset in remove_paths:
166 count += 1
167 if in_svn(path_offset):
168 ui.status(" (%s/%s) Verify path: FAIL: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.VERBOSE, color='RED')
169 raise VerificationError("Path removed in source rev r%s, but still exists in target WC: %s" % (source_rev, path_offset))
170 ui.status(" (%s/%s) Verify remove: OK: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.VERBOSE)
171
172 # Compare each of the check_path entries between source vs. target
173 if check_paths:
174 source_rev_first = int(min(rev_map, key=rev_map.get)) or 1 # The first source_rev we replayed into target
175 ui.status("verify_commit: source_rev_first:%s", source_rev_first, level=ui.DEBUG, color='YELLOW')
176 count_total = len(check_paths)
177 count = 0
178 for path_offset in check_paths:
179 count += 1
180 ui.status("verify_commit: path_offset:%s", path_offset, level=ui.DEBUG, color='YELLOW')
181 source_log_entries = svnclient.run_svn_log(source_url.rstrip("/")+"/"+path_offset+"@"+str(source_rev), source_rev, 1, source_rev-source_rev_first+1)
182 target_log_entries = svnclient.run_svn_log(target_url.rstrip("/")+"/"+path_offset+"@"+str(target_rev), target_rev, 1, target_rev)
183 # Build a list of commits in source_log_entries which matches our
184 # target path_offset.
185 working_path = source_base+"/"+path_offset
186 source_revs = []
187 for log_entry in source_log_entries:
188 source_rev_tmp = log_entry['revision']
189 if source_rev_tmp < source_rev_first:
190 # Only process source revisions which have been replayed into target
191 break
192 #ui.status(" [verify_commit] source_rev_tmp:%s, working_path:%s\n%s", source_rev_tmp, working_path, pp.pformat(log_entry), level=ui.DEBUG, color='MAGENTA')
193 changed_paths_temp = []
194 for d in log_entry['changed_paths']:
195 path = d['path']
196 # Match working_path or any parents
197 if is_child_path(working_path, path):
198 ui.status(" verify_commit: changed_path: %s %s@%s (parent:%s)", d['action'], path, source_rev_tmp, working_path, level=ui.DEBUG, color='YELLOW')
199 changed_paths_temp.append({'path': path, 'data': d})
200 assert changed_paths_temp
201 # Reverse-sort any matches, so that we start with the most-granular (deepest in the tree) path.
202 changed_paths = sorted(changed_paths_temp, key=operator.itemgetter('path'), reverse=True)
203 # Find the action for our working_path in this revision. Use a loop to check in reverse order,
204 # so that if the target file/folder is "M" but has a parent folder with an "A" copy-from.
205 working_path_next = working_path
206 match_d = {}
207 for v in changed_paths:
208 d = v['data']
209 if not match_d:
210 match_d = d
211 path = d['path']
212 if d['action'] not in _valid_svn_actions:
213 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
214 % (log_entry['revision'], d['action']))
215 if d['action'] in 'AR' and d['copyfrom_revision']:
216 # If we found a copy-from action for a parent path, adjust our
217 # working_path to follow the rename/copy-from, just like find_svn_ancestors().
218 working_path_next = working_path.replace(d['path'], d['copyfrom_path'])
219 match_d = d
220 break
221 if is_child_path(working_path, source_base):
222 # Only add source_rev's where the path changed in this revision was a child
223 # of source_base, so that we silently ignore any history that happened on
224 # non-source_base paths (e.g. ignore branch history if we're only replaying trunk).
225 is_diff = False
226 d = match_d
227 if d['action'] == 'M':
228 # For action="M", we need to throw out cases where the only change was to
229 # a property which we ignore, e.g. "svn:mergeinfo".
230 if d['kind'] == "":
231 d['kind'] = svnclient.get_kind(source_repos_url, working_path, log_entry['revision'], d['action'], log_entry['changed_paths'])
232 assert (d['kind'] == 'file') or (d['kind'] == 'dir')
233 if d['kind'] == 'file':
234 # Check for file-content changes
235 # TODO: This should be made ancestor-aware, since the file won't always be at the same path in rev-1
236 sum1 = run_shell_command("svn cat -r %s '%s' | md5sum" % (source_rev_tmp, source_repos_url+working_path+"@"+str(source_rev_tmp)))
237 sum2 = run_shell_command("svn cat -r %s '%s' | md5sum" % (source_rev_tmp-1, source_repos_url+working_path_next+"@"+str(source_rev_tmp-1)))
238 is_diff = True if sum1 <> sum2 else False
239 if not is_diff:
240 # Check for property changes
241 props1 = svnclient.get_all_props(source_repos_url+working_path, source_rev_tmp)
242 props2 = svnclient.get_all_props(source_repos_url+working_path_next, source_rev_tmp-1)
243 # Ignore changes to "svn:mergeinfo", since we don't copy that
244 if 'svn:mergeinfo' in props1: del props1['svn:mergeinfo']
245 if 'svn:mergeinfo' in props2: del props2['svn:mergeinfo']
246 for prop in props1:
247 if prop not in props2 or \
248 props1[prop] != props2[prop]:
249 is_diff = True
250 break
251 for prop in props2:
252 if prop not in props1 or \
253 props1[prop] != props2[prop]:
254 is_diff = True
255 break
256 if not is_diff:
257 ui.status(" verify_commit: skip %s@%s", working_path, source_rev_tmp, level=ui.DEBUG, color='GREEN_B', bold=True)
258 else:
259 is_diff = True
260 if is_diff:
261 ui.status(" verify_commit: source_revs.append(%s), working_path:%s", source_rev_tmp, working_path, level=ui.DEBUG, color='GREEN_B')
262 source_revs.append({'path': working_path, 'revision': source_rev_tmp})
263 working_path = working_path_next
264 # Build a list of all the target commits "svn log" returned
265 target_revs = []
266 target_revs_rmndr = []
267 for log_entry in target_log_entries:
268 target_rev_tmp = log_entry['revision']
269 ui.status(" verify_commit: target_revs.append(%s)", target_rev_tmp, level=ui.DEBUG, color='GREEN_B')
270 target_revs.append(target_rev_tmp)
271 target_revs_rmndr.append(target_rev_tmp)
272 # Compare the two lists
273 for d in source_revs:
274 working_path = d['path']
275 source_rev_tmp = d['revision']
276 target_rev_tmp = get_rev_map(source_rev_tmp, " ")
277 working_offset = working_path[len(source_base):].strip("/")
278 sum1 = run_shell_command("svn cat -r %s '%s' | md5sum" % (source_rev_tmp, source_repos_url+working_path+"@"+str(source_rev_tmp)))
279 sum2 = run_shell_command("svn cat -r %s '%s' | md5sum" % (target_rev_tmp, target_url+"/"+working_offset+"@"+str(target_rev_tmp))) if target_rev_tmp is not None else ""
280 #print "source@%s: %s" % (str(source_rev_tmp).ljust(6), sum1)
281 #print "target@%s: %s" % (str(target_rev_tmp).ljust(6), sum2)
282 ui.status(" verify_commit: %s: source=%s target=%s", working_offset, source_rev_tmp, target_rev_tmp, level=ui.DEBUG, color='GREEN')
283 if not target_rev_tmp:
284 ui.status(" (%s/%s) Verify path: FAIL: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.VERBOSE, color='RED')
285 raise VerificationError("Unable to find corresponding target_rev for source_rev r%s in rev_map (path_offset='%s')" % (source_rev_tmp, path_offset))
286 if target_rev_tmp not in target_revs:
287 # If found a source_rev with no equivalent target_rev in target_revs,
288 # check if the only difference in source_rev vs. source_rev-1 is the
289 # removal/addition of a trailing newline char, since this seems to get
290 # stripped-out sometimes during the replay (via "svn export"?).
291 # Strip any trailing \r\n from file-content (http://stackoverflow.com/a/1656218/346778)
292 sum1 = run_shell_command("svn cat -r %s '%s' | perl -i -p0777we's/\\r\\n\z//' | md5sum" % (source_rev_tmp, source_repos_url+working_path+"@"+str(source_rev_tmp)))
293 sum2 = run_shell_command("svn cat -r %s '%s' | perl -i -p0777we's/\\r\\n\z//' | md5sum" % (source_rev_tmp-1, source_repos_url+working_path+"@"+str(source_rev_tmp-1)))
294 if sum1 <> sum2:
295 ui.status(" (%s/%s) Verify path: FAIL: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.VERBOSE, color='RED')
296 raise VerificationError("Found source_rev (r%s) with no corresponding target_rev: path_offset='%s'" % (source_rev_tmp, path_offset))
297 target_revs_rmndr.remove(target_rev_tmp)
298 if target_revs_rmndr:
299 rmndr_list = ", ".join(map(str, target_revs_rmndr))
300 ui.status(" (%s/%s) Verify path: FAIL: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.VERBOSE, color='RED')
301 raise VerificationError("Found one or more *extra* target_revs: path_offset='%s', target_revs='%s'" % (path_offset, rmndr_list))
302 ui.status(" (%s/%s) Verify path: OK: %s", str(count).rjust(len(str(count_total))), count_total, path_offset, level=ui.VERBOSE)
303
304 # Ensure there are no "extra" files in the target side
305 if options.verify == 2:
306 target_paths = []
307 child_paths = run_svn(["list", "--recursive", "-r", target_rev, target_url+"@"+str(target_rev)])
308 child_paths = child_paths.strip("\n").split("\n")
309 for child_path in child_paths:
310 if not child_path:
311 continue
312 # Directories have a trailing slash in the "svn list" output
313 child_path_is_dir = True if child_path[-1] == "/" else False
314 child_path_offset = child_path.rstrip('/') if child_path_is_dir else child_path
315 if not child_path_is_dir:
316 target_paths.append(child_path_offset)
317 # Compare
318 for path_offset in target_paths:
319 if not path_offset in check_paths:
320 raise VerificationError("Path exists in target (@%s) but not source (@%s): %s" % (target_rev, source_rev, path_offset))
321 for path_offset in check_paths:
322 if not path_offset in target_paths:
323 raise VerificationError("Path exists in source (@%s) but not target (@%s): %s" % (source_rev, target_rev, path_offset))
324
325 def full_svn_revert():
326 """
327 Do an "svn revert" and proactively remove any extra files in the working copy.
328 """
329 run_svn(["revert", "--recursive", "."])
330 output = run_svn(["status"])
331 if output:
332 output_lines = output.strip("\n").split("\n")
333 for line in output_lines:
334 if line[0] == "?":
335 path = line[4:].strip(" ")
336 if os.path.isfile(path):
337 os.remove(path)
338 if os.path.isdir(path):
339 shutil.rmtree(path)
340
341 def gen_tracking_revprops(source_rev):
342 """
343 Build an array of svn2svn-specific source-tracking revprops.
344 """
345 revprops = [{'name':'svn2svn:source_uuid', 'value':source_repos_uuid},
346 {'name':'svn2svn:source_url', 'value':source_url},
347 {'name':'svn2svn:source_rev', 'value':source_rev}]
348 return revprops
349
350 def sync_svn_props(source_url, source_rev, path_offset):
351 """
352 Carry-forward any unversioned properties from the source repo to the
353 target WC.
354 """
355 source_props = svnclient.get_all_props(join_path(source_url, path_offset), source_rev)
356 target_props = svnclient.get_all_props(path_offset)
357 if 'svn:mergeinfo' in source_props:
358 # Never carry-forward "svn:mergeinfo"
359 del source_props['svn:mergeinfo']
360 for prop in target_props:
361 if prop not in source_props:
362 # Remove any properties which exist in target but not source
363 run_svn(["propdel", prop, path_offset])
364 for prop in source_props:
365 if prop not in target_props or \
366 source_props[prop] != target_props[prop]:
367 # Set/update any properties which exist in source but not target or
368 # whose value differs between source vs. target.
369 run_svn(["propset", prop, source_props[prop], path_offset])
370
371 def in_svn(p, require_in_repo=False, prefix=""):
372 """
373 Check if a given file/folder is being tracked by Subversion.
374 Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
375 With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
376 Use "svn status" to check the status of the file/folder.
377 """
378 entries = svnclient.get_svn_status(p, no_recursive=True)
379 if not entries:
380 return False
381 d = entries[0]
382 if require_in_repo and (d['status'] == 'added' or d['revision'] is None):
383 # If caller requires this path to be in the SVN repo, prevent returning True
384 # for paths that are only locally-added.
385 ret = False
386 else:
387 # Don't consider files tracked as deleted in the WC as under source-control.
388 # Consider files which are locally added/copied as under source-control.
389 ret = True if not (d['status'] == 'deleted') and (d['type'] == 'normal' or d['status'] == 'added' or d['copied'] == 'true') else False
390 ui.status(prefix + ">> in_svn('%s', require_in_repo=%s) --> %s", p, str(require_in_repo), str(ret), level=ui.DEBUG, color='GREEN')
391 return ret
392
393 def is_child_path(path, p_path):
394 return True if (path == p_path) or (path.startswith(p_path+"/")) else False
395
396 def join_path(base, child):
397 base.rstrip('/')
398 return base+"/"+child if child else base
399
400 def find_svn_ancestors(svn_repos_url, start_path, start_rev, stop_base_path=None, prefix=""):
401 """
402 Given an initial starting path+rev, walk the SVN history backwards to inspect the
403 ancestry of that path, optionally seeing if it traces back to stop_base_path.
404
405 Build an array of copyfrom_path and copyfrom_revision pairs for each of the "svn copy"'s.
406 If we find a copyfrom_path which stop_base_path is a substring match of (e.g. we crawled
407 back to the initial branch-copy from trunk), then return the collection of ancestor
408 paths. Otherwise, copyfrom_path has no ancestry compared to stop_base_path.
409
410 This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
411 file/folder was renamed in a branch and then that branch was merged back to trunk.
412
413 'svn_repos_url' is the full URL to the root of the SVN repository,
414 e.g. 'file:///path/to/repo'
415 'start_path' is the path in the SVN repo to the source path to start checking
416 ancestry at, e.g. '/branches/fix1/projectA/file1.txt'.
417 'start_rev' is the revision to start walking the history of start_path backwards from.
418 'stop_base_path' is the path in the SVN repo to stop tracing ancestry once we've reached,
419 i.e. the target path we're trying to trace ancestry back to, e.g. '/trunk'.
420 """
421 ui.status(prefix + ">> find_svn_ancestors: Start: (%s) start_path: %s stop_base_path: %s",
422 svn_repos_url, start_path+"@"+str(start_rev), stop_base_path, level=ui.DEBUG, color='YELLOW')
423 done = False
424 no_ancestry = False
425 cur_path = start_path
426 cur_rev = start_rev
427 first_iter_done = False
428 ancestors = []
429 while not done:
430 # Get the first "svn log" entry for cur_path (relative to @cur_rev)
431 ui.status(prefix + ">> find_svn_ancestors: %s", svn_repos_url+cur_path+"@"+str(cur_rev), level=ui.DEBUG, color='YELLOW')
432 log_entry = svnclient.get_first_svn_log_entry(svn_repos_url+cur_path, 1, cur_rev)
433 if not log_entry:
434 ui.status(prefix + ">> find_svn_ancestors: Done: no log_entry", level=ui.DEBUG, color='YELLOW')
435 done = True
436 break
437 # If we found a copy-from case which matches our stop_base_path, we're done.
438 # ...but only if we've at least tried to search for the first copy-from path.
439 if stop_base_path is not None and first_iter_done and is_child_path(cur_path, stop_base_path):
440 ui.status(prefix + ">> find_svn_ancestors: Done: Found is_child_path(cur_path, stop_base_path) and first_iter_done=True", level=ui.DEBUG, color='YELLOW')
441 done = True
442 break
443 first_iter_done = True
444 # Search for any actions on our target path (or parent paths).
445 changed_paths_temp = []
446 for d in log_entry['changed_paths']:
447 path = d['path']
448 if is_child_path(cur_path, path):
449 changed_paths_temp.append({'path': path, 'data': d})
450 if not changed_paths_temp:
451 # If no matches, then we've hit the end of the ancestry-chain.
452 ui.status(prefix + ">> find_svn_ancestors: Done: No matching changed_paths", level=ui.DEBUG, color='YELLOW')
453 done = True
454 continue
455 # Reverse-sort any matches, so that we start with the most-granular (deepest in the tree) path.
456 changed_paths = sorted(changed_paths_temp, key=operator.itemgetter('path'), reverse=True)
457 # Find the action for our cur_path in this revision. Use a loop to check in reverse order,
458 # so that if the target file/folder is "M" but has a parent folder with an "A" copy-from
459 # then we still correctly match the deepest copy-from.
460 for v in changed_paths:
461 d = v['data']
462 path = d['path']
463 # Check action-type for this file
464 action = d['action']
465 if action not in _valid_svn_actions:
466 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
467 % (log_entry['revision'], action))
468 ui.status(prefix + "> %s %s%s", action, path,
469 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
470 level=ui.DEBUG, color='YELLOW')
471 if action == 'D':
472 # If file/folder was deleted, ancestry-chain stops here
473 if stop_base_path:
474 no_ancestry = True
475 ui.status(prefix + ">> find_svn_ancestors: Done: deleted", level=ui.DEBUG, color='YELLOW')
476 done = True
477 break
478 if action in 'RA':
479 # If file/folder was added/replaced but not a copy, ancestry-chain stops here
480 if not d['copyfrom_path']:
481 if stop_base_path:
482 no_ancestry = True
483 ui.status(prefix + ">> find_svn_ancestors: Done: %s with no copyfrom_path",
484 "Added" if action == "A" else "Replaced",
485 level=ui.DEBUG, color='YELLOW')
486 done = True
487 break
488 # Else, file/folder was added/replaced and is a copy, so add an entry to our ancestors list
489 # and keep checking for ancestors
490 ui.status(prefix + ">> find_svn_ancestors: Found copy-from (action=%s): %s --> %s",
491 action, path, d['copyfrom_path']+"@"+str(d['copyfrom_revision']),
492 level=ui.DEBUG, color='YELLOW')
493 ancestors.append({'path': cur_path, 'revision': log_entry['revision'],
494 'copyfrom_path': cur_path.replace(d['path'], d['copyfrom_path']), 'copyfrom_rev': d['copyfrom_revision']})
495 cur_path = cur_path.replace(d['path'], d['copyfrom_path'])
496 cur_rev = d['copyfrom_revision']
497 # Follow the copy and keep on searching
498 break
499 if stop_base_path and no_ancestry:
500 # If we're tracing back ancestry to a specific target stop_base_path and
501 # the ancestry-chain stopped before we reached stop_base_path, then return
502 # nothing since there is no ancestry chaining back to that target.
503 ancestors = []
504 if ancestors:
505 if ui.get_level() >= ui.DEBUG:
506 max_len = 0
507 for idx in range(len(ancestors)):
508 d = ancestors[idx]
509 max_len = max(max_len, len(d['path']+"@"+str(d['revision'])))
510 ui.status(prefix + ">> find_svn_ancestors: Found parent ancestors:", level=ui.DEBUG, color='YELLOW_B')
511 for idx in range(len(ancestors)):
512 d = ancestors[idx]
513 ui.status(prefix + " [%s] %s --> %s", idx,
514 str(d['path']+"@"+str(d['revision'])).ljust(max_len),
515 str(d['copyfrom_path']+"@"+str(d['copyfrom_rev'])),
516 level=ui.DEBUG, color='YELLOW')
517 else:
518 ui.status(prefix + ">> find_svn_ancestors: No ancestor-chain found: %s",
519 svn_repos_url+start_path+"@"+str(start_rev), level=ui.DEBUG, color='YELLOW')
520 return ancestors
521
522 def get_rev_map(source_rev, prefix):
523 """
524 Find the equivalent rev # in the target repo for the given rev # from the source repo.
525 """
526 ui.status(prefix + ">> get_rev_map(%s)", source_rev, level=ui.DEBUG, color='GREEN')
527 # Find the highest entry less-than-or-equal-to source_rev
528 for rev in range(int(source_rev), 0, -1):
529 in_rev_map = True if rev in rev_map else False
530 ui.status(prefix + ">> get_rev_map: rev=%s in_rev_map=%s", rev, str(in_rev_map), level=ui.DEBUG, color='BLACK_B')
531 if in_rev_map:
532 return int(rev_map[rev])
533 # Else, we fell off the bottom of the rev_map. Ruh-roh...
534 return None
535
536 def set_rev_map(source_rev, target_rev):
537 #ui.status(">> set_rev_map: source_rev=%s target_rev=%s", source_rev, target_rev, level=ui.DEBUG, color='GREEN')
538 global rev_map
539 rev_map[int(source_rev)]=int(target_rev)
540
541 def build_rev_map(target_url, target_end_rev, source_info):
542 """
543 Check for any already-replayed history from source_url (source_info) and
544 build the mapping-table of source_rev -> target_rev.
545 """
546 global rev_map
547 rev_map = {}
548 ui.status("Rebuilding target_rev -> source_rev rev_map...", level=ui.VERBOSE)
549 proc_count = 0
550 it_log_entries = svnclient.iter_svn_log_entries(target_url, 1, target_end_rev, get_changed_paths=False, get_revprops=True)
551 for log_entry in it_log_entries:
552 if log_entry['revprops']:
553 revprops = {}
554 for v in log_entry['revprops']:
555 if v['name'].startswith('svn2svn:'):
556 revprops[v['name']] = v['value']
557 if revprops and \
558 revprops['svn2svn:source_uuid'] == source_info['repos_uuid'] and \
559 revprops['svn2svn:source_url'] == source_info['url']:
560 source_rev = revprops['svn2svn:source_rev']
561 target_rev = log_entry['revision']
562 set_rev_map(source_rev, target_rev)
563 proc_count += 1
564 if proc_count % 500 == 0:
565 ui.status("...processed %s (%s of %s)..." % (proc_count, target_rev, target_end_rev), level=ui.VERBOSE)
566
567 def get_svn_dirlist(svn_path, rev_number = ""):
568 """
569 Get a list of all the child contents (recusive) of the given folder path.
570 """
571 args = ["list"]
572 path = svn_path
573 if rev_number:
574 args += ["-r", rev_number]
575 path += "@"+str(rev_number)
576 args += [path]
577 paths = run_svn(args, no_fail=True)
578 paths = paths.strip("\n").split("\n") if len(paths)>1 else []
579 return paths
580
581 def path_in_list(paths, path):
582 for p in paths:
583 if is_child_path(path, p):
584 return True
585 return False
586
587 def add_path(paths, path):
588 if not path_in_list(paths, path):
589 paths.append(path)
590
591 def in_ancestors(ancestors, ancestor):
592 match = True
593 for idx in range(len(ancestors)-1, 0, -1):
594 if int(ancestors[idx]['revision']) > ancestor['revision']:
595 match = is_child_path(ancestor['path'], ancestors[idx]['path'])
596 break
597 return match
598
599 def do_svn_add(source_url, path_offset, source_rev, source_ancestors, \
600 parent_copyfrom_path="", parent_copyfrom_rev="", \
601 export_paths={}, is_dir = False, skip_paths=[], prefix = ""):
602 """
603 Given the add'd source path, replay the "svn add/copy" commands to correctly
604 track renames across copy-from's.
605
606 For example, consider a sequence of events like this:
607 1. svn copy /trunk /branches/fix1
608 2. (Make some changes on /branches/fix1)
609 3. svn mv /branches/fix1/Proj1 /branches/fix1/Proj2 " Rename folder
610 4. svn mv /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt " Rename file inside renamed folder
611 5. svn co /trunk && svn merge /branches/fix1
612 After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
613 and and add of /trunk/Proj2 copy-from /branches/fix1/Proj2. If we were just
614 to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
615 we'd lose the logical history that Proj2/file2.txt is really a descendant
616 of Proj1/file1.txt.
617
618 'path_offset' is the offset from source_base to the file to check ancestry for,
619 e.g. 'projectA/file1.txt'. path = source_repos_url + source_base + path_offset.
620 'source_rev' is the revision ("svn log") that we're processing from the source repo.
621 'parent_copyfrom_path' and 'parent_copyfrom_rev' is the copy-from path of the parent
622 directory, when being called recursively by do_svn_add_dir().
623 'export_paths' is the list of path_offset's that we've deferred running "svn export" on.
624 'is_dir' is whether path_offset is a directory (rather than a file).
625 """
626 source_base = source_url[len(source_repos_url):] # e.g. '/trunk'
627 ui.status(prefix + ">> do_svn_add: %s %s", join_path(source_base, path_offset)+"@"+str(source_rev),
628 " (parent-copyfrom: "+parent_copyfrom_path+"@"+str(parent_copyfrom_rev)+")" if parent_copyfrom_path else "",
629 level=ui.DEBUG, color='GREEN')
630 # Check if the given path has ancestors which chain back to the current source_base
631 found_ancestor = False
632 ancestors = find_svn_ancestors(source_repos_url, join_path(source_base, path_offset), source_rev, stop_base_path=source_base, prefix=prefix+" ")
633 ancestor = ancestors[len(ancestors)-1] if ancestors else None # Choose the eldest ancestor, i.e. where we reached stop_base_path=source_base
634 if ancestor and not in_ancestors(source_ancestors, ancestor):
635 ancestor = None
636 copyfrom_path = ancestor['copyfrom_path'] if ancestor else ""
637 copyfrom_rev = ancestor['copyfrom_rev'] if ancestor else ""
638 if ancestor:
639 # The copy-from path has ancestry back to source_url.
640 ui.status(prefix + ">> do_svn_add: Check copy-from: Found parent: %s", copyfrom_path+"@"+str(copyfrom_rev),
641 level=ui.DEBUG, color='GREEN', bold=True)
642 found_ancestor = True
643 # Map the copyfrom_rev (source repo) to the equivalent target repo rev #. This can
644 # return None in the case where copyfrom_rev is *before* our source_start_rev.
645 tgt_rev = get_rev_map(copyfrom_rev, prefix+" ")
646 ui.status(prefix + ">> do_svn_add: get_rev_map: %s (source) -> %s (target)", copyfrom_rev, tgt_rev, level=ui.DEBUG, color='GREEN')
647 else:
648 ui.status(prefix + ">> do_svn_add: Check copy-from: No ancestor chain found.", level=ui.DEBUG, color='GREEN')
649 found_ancestor = False
650 if found_ancestor and tgt_rev:
651 # Check if this path_offset in the target WC already has this ancestry, in which
652 # case there's no need to run the "svn copy" (again).
653 path_in_svn = in_svn(path_offset, prefix=prefix+" ")
654 log_entry = svnclient.get_last_svn_log_entry(path_offset, 1, 'HEAD', get_changed_paths=False) if in_svn(path_offset, require_in_repo=True, prefix=prefix+" ") else []
655 if (not log_entry or (log_entry['revision'] != tgt_rev)):
656 copyfrom_offset = copyfrom_path[len(source_base):].strip('/')
657 ui.status(prefix + ">> do_svn_add: svn_copy: Copy-from: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
658 ui.status(prefix + " copyfrom: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
659 ui.status(prefix + " p_copyfrom: %s", parent_copyfrom_path+"@"+str(parent_copyfrom_rev) if parent_copyfrom_path else "", level=ui.DEBUG, color='GREEN')
660 if path_in_svn and \
661 ((parent_copyfrom_path and is_child_path(copyfrom_path, parent_copyfrom_path)) and \
662 (parent_copyfrom_rev and copyfrom_rev == parent_copyfrom_rev)):
663 # When being called recursively, if this child entry has the same ancestor as the
664 # the parent, then no need to try to run another "svn copy".
665 ui.status(prefix + ">> do_svn_add: svn_copy: Same ancestry as parent: %s",
666 parent_copyfrom_path+"@"+str(parent_copyfrom_rev),level=ui.DEBUG, color='GREEN')
667 pass
668 else:
669 # Copy this path from the equivalent path+rev in the target repo, to create the
670 # equivalent history.
671 if parent_copyfrom_path:
672 # If we have a parent copy-from path, we mis-match that so display a status
673 # message describing the action we're mimic'ing. If path_in_svn, then this
674 # is logically a "replace" rather than an "add".
675 ui.status(" %s %s (from %s)", ('R' if path_in_svn else 'A'), join_path(source_base, path_offset), ancestors[0]['copyfrom_path']+"@"+str(copyfrom_rev), level=ui.VERBOSE)
676 if path_in_svn:
677 # If local file is already under version-control, then this is a replace.
678 ui.status(prefix + ">> do_svn_add: pre-copy: local path already exists: %s", path_offset, level=ui.DEBUG, color='GREEN')
679 run_svn(["update", path_offset])
680 run_svn(["remove", "--force", path_offset])
681 run_svn(["copy", "-r", tgt_rev, join_path(target_url, copyfrom_offset)+"@"+str(tgt_rev), path_offset])
682 if is_dir:
683 # Export the final verison of all files in this folder.
684 add_path(export_paths, path_offset)
685 else:
686 # Export the final verison of this file.
687 run_svn(["export", "--force", "-r", source_rev,
688 source_repos_url+join_path(source_base, path_offset)+"@"+str(source_rev), path_offset])
689 if options.keep_prop:
690 sync_svn_props(source_url, source_rev, path_offset)
691 else:
692 ui.status(prefix + ">> do_svn_add: Skipped 'svn copy': %s", path_offset, level=ui.DEBUG, color='GREEN')
693 else:
694 # Else, either this copy-from path has no ancestry back to source_url OR copyfrom_rev comes
695 # before our initial source_start_rev (i.e. tgt_rev == None), so can't do a "svn copy".
696 # Create (parent) directory if needed.
697 # TODO: This is (nearly) a duplicate of code in process_svn_log_entry(). Should this be
698 # split-out to a shared tag?
699 p_path = path_offset if is_dir else os.path.dirname(path_offset).strip() or None
700 if p_path and not os.path.exists(p_path):
701 run_svn(["mkdir", p_path])
702 if not in_svn(path_offset, prefix=prefix+" "):
703 if is_dir:
704 # Export the final verison of all files in this folder.
705 add_path(export_paths, path_offset)
706 else:
707 # Export the final verison of this file. We *need* to do this before running
708 # the "svn add", even if we end-up re-exporting this file again via export_paths.
709 run_svn(["export", "--force", "-r", source_rev,
710 source_repos_url+join_path(source_base, path_offset)+"@"+str(source_rev), path_offset])
711 # If not already under version-control, then "svn add" this file/folder.
712 run_svn(["add", "--parents", path_offset])
713 if options.keep_prop:
714 sync_svn_props(source_url, source_rev, path_offset)
715 if is_dir:
716 # For any folders that we process, process any child contents, so that we correctly
717 # replay copies/replaces/etc.
718 do_svn_add_dir(source_url, path_offset, source_rev, source_ancestors,
719 copyfrom_path, copyfrom_rev, export_paths, skip_paths, prefix+" ")
720
721 def do_svn_add_dir(source_url, path_offset, source_rev, source_ancestors, \
722 parent_copyfrom_path, parent_copyfrom_rev, \
723 export_paths, skip_paths, prefix=""):
724 source_base = source_url[len(source_repos_url):] # e.g. '/trunk'
725 # Get the directory contents, to compare between the local WC (target_url) vs. the remote repo (source_url)
726 # TODO: paths_local won't include add'd paths because "svn ls" lists the contents of the
727 # associated remote repo folder. (Is this a problem?)
728 paths_local = get_svn_dirlist(path_offset)
729 paths_remote = get_svn_dirlist(join_path(source_url, path_offset), source_rev)
730 ui.status(prefix + ">> do_svn_add_dir: paths_local: %s", str(paths_local), level=ui.DEBUG, color='GREEN')
731 ui.status(prefix + ">> do_svn_add_dir: paths_remote: %s", str(paths_remote), level=ui.DEBUG, color='GREEN')
732 # Update files/folders which exist in remote but not local
733 for path in paths_remote:
734 path_is_dir = True if path[-1] == "/" else False
735 working_path = join_path(path_offset, (path.rstrip('/') if path_is_dir else path)).lstrip('/')
736 #print "working_path:%s = path_offset:%s + path:%s" % (working_path, path_offset, path)
737 if not working_path in skip_paths:
738 do_svn_add(source_url, working_path, source_rev, source_ancestors,
739 parent_copyfrom_path, parent_copyfrom_rev,
740 export_paths, path_is_dir, skip_paths, prefix+" ")
741 # Remove files/folders which exist in local but not remote
742 for path in paths_local:
743 if not path in paths_remote:
744 path_is_dir = True if path[-1] == "/" else False
745 working_path = join_path(path_offset, (path.rstrip('/') if path_is_dir else path)).lstrip('/')
746 ui.status(" %s %s", 'D', join_path(source_base, working_path), level=ui.VERBOSE)
747 run_svn(["update", working_path])
748 run_svn(["remove", "--force", working_path])
749 # TODO: Does this handle deleted folders too? Wouldn't want to have a case
750 # where we only delete all files from folder but leave orphaned folder around.
751
752 def process_svn_log_entry(log_entry, ancestors, commit_paths, prefix = ""):
753 """
754 Process SVN changes from the given log entry. Build an array (commit_paths)
755 of the paths in the working-copy that were changed, i.e. the paths which
756 we'll pass to "svn commit".
757 """
758 export_paths = []
759 source_rev = log_entry['revision']
760 source_url = log_entry['url']
761 source_base = source_url[len(source_repos_url):] # e.g. '/trunk'
762 ui.status(prefix + ">> process_svn_log_entry: %s", source_url+"@"+str(source_rev), level=ui.DEBUG, color='GREEN')
763 for d in log_entry['changed_paths']:
764 # Get the full path for this changed_path
765 # e.g. '/branches/bug123/projectA/file1.txt'
766 path = d['path']
767 if not is_child_path(path, source_base):
768 # Ignore changed files that are not part of this subdir
769 ui.status(prefix + ">> process_svn_log_entry: Unrelated path: %s (base: %s)", path, source_base, level=ui.DEBUG, color='GREEN')
770 continue
771 if d['kind'] == "" or d['kind'] == 'none':
772 # The "kind" value was introduced in SVN 1.6, and "svn log --xml" won't return a "kind"
773 # value for commits made on a pre-1.6 repo, even if the server is now running 1.6.
774 # We need to use other methods to fetch the node-kind for these cases.
775 d['kind'] = svnclient.get_kind(source_repos_url, path, source_rev, d['action'], log_entry['changed_paths'])
776 assert (d['kind'] == 'file') or (d['kind'] == 'dir')
777 path_is_dir = True if d['kind'] == 'dir' else False
778 path_is_file = True if d['kind'] == 'file' else False
779 # Calculate the offset (based on source_base) for this changed_path
780 # e.g. 'projectA/file1.txt'
781 # (path = source_base + "/" + path_offset)
782 path_offset = path[len(source_base):].strip("/")
783 # Get the action for this path
784 action = d['action']
785 if action not in _valid_svn_actions:
786 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
787 % (source_rev, action))
788 ui.status(" %s %s%s", action, d['path'],
789 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
790 level=ui.VERBOSE)
791
792 # Try to be efficient and keep track of an explicit list of paths in the
793 # working copy that changed. If we commit from the root of the working copy,
794 # then SVN needs to crawl the entire working copy looking for pending changes.
795 commit_paths.append(path_offset)
796
797 # Special-handling for replace's
798 if action == 'R':
799 # If file was "replaced" (deleted then re-added, all in same revision),
800 # then we need to run the "svn rm" first, then change action='A'. This
801 # lets the normal code below handle re-"svn add"'ing the files. This
802 # should replicate the "replace".
803 if path_offset and in_svn(path_offset):
804 # Target path might not be under version-control yet, e.g. parent "add"
805 # was a copy-from a branch which had no ancestry back to trunk, and each
806 # child folder under that parent folder is a "replace" action on the final
807 # merge to trunk. Since the child folders will be in skip_paths, do_svn_add
808 # wouldn't have created them while processing the parent "add" path.
809 if path_is_dir:
810 # Need to "svn update" before "svn remove" in case child contents are at
811 # a higher rev than the (parent) path_offset.
812 run_svn(["update", path_offset])
813 run_svn(["remove", "--force", path_offset])
814 action = 'A'
815
816 # Handle all the various action-types
817 # (Handle "add" first, for "svn copy/move" support)
818 if action == 'A':
819 # Determine where to export from.
820 svn_copy = False
821 # Handle cases where this "add" was a copy from another URL in the source repo
822 if d['copyfrom_revision']:
823 copyfrom_path = d['copyfrom_path']
824 copyfrom_rev = d['copyfrom_revision']
825 skip_paths = []
826 for tmp_d in log_entry['changed_paths']:
827 tmp_path = tmp_d['path']
828 if is_child_path(tmp_path, path) and tmp_d['action'] in 'ARD':
829 # Build list of child entries which are also in the changed_paths list,
830 # so that do_svn_add() can skip processing these entries when recursing
831 # since we'll end-up processing them later. Don't include action="M" paths
832 # in this list because it's non-conclusive: it could just mean that the
833 # file was modified *after* the copy-from, so we still want do_svn_add()
834 # to re-create the correct ancestry.
835 tmp_path_offset = tmp_path[len(source_base):].strip("/")
836 skip_paths.append(tmp_path_offset)
837 do_svn_add(source_url, path_offset, source_rev, ancestors, "", "", export_paths, path_is_dir, skip_paths, prefix+" ")
838 # Else just "svn export" the files from the source repo and "svn add" them.
839 else:
840 # Create (parent) directory if needed
841 p_path = path_offset if path_is_dir else os.path.dirname(path_offset).strip() or None
842 if p_path and not os.path.exists(p_path):
843 run_svn(["mkdir", p_path])
844 # Export the entire added tree.
845 if path_is_dir:
846 # For directories, defer the (recurisve) "svn export". Might have a
847 # situation in a branch merge where the entry in the svn-log is a
848 # non-copy-from'd "add" but there are child contents (that we haven't
849 # gotten to yet in log_entry) that are copy-from's. When we try do
850 # the "svn copy" later on in do_svn_add() for those copy-from'd paths,
851 # having pre-existing (svn-add'd) contents creates some trouble.
852 # Instead, just create the stub folders ("svn mkdir" above) and defer
853 # exporting the final file-state until the end.
854 add_path(export_paths, path_offset)
855 else:
856 # Export the final verison of this file. We *need* to do this before running
857 # the "svn add", even if we end-up re-exporting this file again via export_paths.
858 run_svn(["export", "--force", "-r", source_rev,
859 join_path(source_url, path_offset)+"@"+str(source_rev), path_offset])
860 if not in_svn(path_offset, prefix=prefix+" "):
861 # Need to use in_svn here to handle cases where client committed the parent
862 # folder and each indiv sub-folder.
863 run_svn(["add", "--parents", path_offset])
864 if options.keep_prop:
865 sync_svn_props(source_url, source_rev, path_offset)
866
867 elif action == 'D':
868 if path_is_dir:
869 # For dirs, need to "svn update" before "svn remove" because the final
870 # "svn commit" will fail if the parent (path_offset) is at a lower rev
871 # than any of the child contents. This needs to be a recursive update.
872 run_svn(["update", path_offset])
873 run_svn(["remove", "--force", path_offset])
874
875 elif action == 'M':
876 if path_is_file:
877 run_svn(["export", "--force", "-N" , "-r", source_rev,
878 join_path(source_url, path_offset)+"@"+str(source_rev), path_offset])
879 if path_is_dir:
880 # For dirs, need to "svn update" before export/prop-sync because the
881 # final "svn commit" will fail if the parent is at a lower rev than
882 # child contents. Just need to update the rev-state of the dir (d['path']),
883 # don't need to recursively update all child contents.
884 # (??? is this the right reason?)
885 run_svn(["update", "-N", path_offset])
886 if options.keep_prop:
887 sync_svn_props(source_url, source_rev, path_offset)
888
889 else:
890 raise InternalError("Internal Error: process_svn_log_entry: Unhandled 'action' value: '%s'"
891 % action)
892
893 # Export the final version of all add'd paths from source_url
894 if export_paths:
895 for path_offset in export_paths:
896 run_svn(["export", "--force", "-r", source_rev,
897 join_path(source_url, path_offset)+"@"+str(source_rev), path_offset])
898
899 def keep_revnum(source_rev, target_rev_last, wc_target_tmp):
900 """
901 Add "padding" target revisions as needed to keep source and target
902 revision #'s identical.
903 """
904 if int(source_rev) <= int(target_rev_last):
905 raise InternalError("keep-revnum mode is enabled, "
906 "but source revision (r%s) is less-than-or-equal last target revision (r%s)" % \
907 (source_rev, target_rev_last))
908 if int(target_rev_last) < int(source_rev)-1:
909 # Add "padding" target revisions to keep source and target rev #'s identical
910 if os.path.exists(wc_target_tmp):
911 shutil.rmtree(wc_target_tmp)
912 run_svn(["checkout", "-r", "HEAD", "--depth=empty", target_repos_url, wc_target_tmp])
913 for rev_num in range(int(target_rev_last)+1, int(source_rev)):
914 run_svn(["propset", "svn2svn:keep-revnum", rev_num, wc_target_tmp])
915 output = run_svn(["commit", "-m", "", wc_target_tmp])
916 rev_num_tmp = parse_svn_commit_rev(output) if output else None
917 assert rev_num == rev_num_tmp
918 ui.status("Committed revision %s (keep-revnum).", rev_num)
919 target_rev_last = rev_num
920 shutil.rmtree(wc_target_tmp)
921 return target_rev_last
922
923 def disp_svn_log_summary(log_entry):
924 ui.status("------------------------------------------------------------------------")
925 ui.status("r%s | %s | %s",
926 log_entry['revision'],
927 log_entry['author'],
928 str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' ')))
929 ui.status(log_entry['message'])
930
931 def real_main(args, parser):
932 global source_url, target_url, rev_map
933 source_url = args.pop(0).rstrip("/") # e.g. 'http://server/svn/source/trunk'
934 target_url = args.pop(0).rstrip("/") # e.g. 'file:///svn/target/trunk'
935 ui.status("options: %s", str(options), level=ui.DEBUG, color='GREEN')
936
937 # Make sure that both the source and target URL's are valid
938 source_info = svnclient.get_svn_info(source_url)
939 assert is_child_path(source_url, source_info['repos_url'])
940 target_info = svnclient.get_svn_info(target_url)
941 assert is_child_path(target_url, target_info['repos_url'])
942
943 # Init global vars
944 global source_repos_url,source_base,source_repos_uuid
945 source_repos_url = source_info['repos_url'] # e.g. 'http://server/svn/source'
946 source_base = source_url[len(source_repos_url):] # e.g. '/trunk'
947 source_repos_uuid = source_info['repos_uuid']
948 global target_repos_url,target_base
949 target_repos_url = target_info['repos_url'] # e.g. 'http://server/svn/target'
950 target_base = target_url[len(target_repos_url):] # e.g. '/trunk'
951
952 # Init start and end revision
953 try:
954 source_start_rev = svnclient.get_svn_rev(source_repos_url, options.rev_start if options.rev_start else 1)
955 except ExternalCommandFailed:
956 parser.error("invalid start source revision value: %s" % (options.rev_start))
957 try:
958 source_end_rev = svnclient.get_svn_rev(source_repos_url, options.rev_end if options.rev_end else "HEAD")
959 except ExternalCommandFailed:
960 parser.error("invalid end source revision value: %s" % (options.rev_end))
961 ui.status("Using source revision range %s:%s", source_start_rev, source_end_rev, level=ui.VERBOSE)
962
963 # TODO: If options.keep_date, should we try doing a "svn propset" on an *existing* revision
964 # as a sanity check, so we check if the pre-revprop-change hook script is correctly setup
965 # before doing first replay-commit?
966
967 target_rev_last = target_info['revision'] # Last revision # in the target repo
968 wc_target = os.path.abspath('_wc_target')
969 wc_target_tmp = os.path.abspath('_tmp_wc_target')
970 num_entries_proc = 0
971 commit_count = 0
972 source_rev = None
973 target_rev = None
974
975 # Check out a working copy of target_url if needed
976 wc_exists = os.path.exists(wc_target)
977 if wc_exists and not options.cont_from_break:
978 shutil.rmtree(wc_target)
979 wc_exists = False
980 if not wc_exists:
981 ui.status("Checking-out _wc_target...", level=ui.VERBOSE)
982 svnclient.svn_checkout(target_url, wc_target)
983 os.chdir(wc_target)
984 if wc_exists:
985 # If using an existing WC, make sure it's clean ("svn revert")
986 ui.status("Cleaning-up _wc_target...", level=ui.VERBOSE)
987 run_svn(["cleanup"])
988 full_svn_revert()
989
990 if not options.cont_from_break:
991 # TODO: Warn user if trying to start (non-continue) into a non-empty target path?
992 # Get the first log entry at/after source_start_rev, which is where
993 # we'll do the initial import from.
994 source_ancestors = find_svn_ancestors(source_repos_url, source_base, source_end_rev, prefix=" ")
995 it_log_start = svnclient.iter_svn_log_entries(source_url, source_start_rev, source_end_rev, get_changed_paths=False, ancestors=source_ancestors)
996 source_start_log = None
997 for log_entry in it_log_start:
998 # Pick the first entry. Need to use a "for ..." loop since we're using an iterator.
999 source_start_log = log_entry
1000 break
1001 if not source_start_log:
1002 raise InternalError("Unable to find any matching revisions between %s:%s in source_url: %s" % \
1003 (source_start_rev, source_end_rev, source_url))
1004
1005 # This is the revision we will start from for source_url
1006 source_start_rev = int(source_start_log['revision'])
1007 ui.status("Starting at source revision %s.", source_start_rev, level=ui.VERBOSE)
1008 ui.status("")
1009 if options.keep_revnum and source_rev > target_rev_last:
1010 target_rev_last = keep_revnum(source_rev, target_rev_last, wc_target_tmp)
1011
1012 # For the initial commit to the target URL, export all the contents from
1013 # the source URL at the start-revision.
1014 disp_svn_log_summary(svnclient.get_one_svn_log_entry(source_repos_url, source_start_rev, source_start_rev))
1015 # Export and add file-contents from source_url@source_start_rev
1016 source_start_url = source_url if not source_ancestors else source_repos_url+source_ancestors[len(source_ancestors)-1]['copyfrom_path']
1017 top_paths = run_svn(["list", "-r", source_start_rev, source_start_url+"@"+str(source_start_rev)])
1018 top_paths = top_paths.strip("\n").split("\n")
1019 for path in top_paths:
1020 # For each top-level file/folder...
1021 if not path:
1022 continue
1023 # Directories have a trailing slash in the "svn list" output
1024 path_is_dir = True if path[-1] == "/" else False
1025 path_offset = path.rstrip('/') if path_is_dir else path
1026 if in_svn(path_offset, prefix=" "):
1027 raise InternalError("Cannot replay history on top of pre-existing structure: %s" % join_path(source_start_url, path_offset))
1028 if path_is_dir and not os.path.exists(path_offset):
1029 os.makedirs(path_offset)
1030 run_svn(["export", "--force", "-r" , source_start_rev, join_path(source_start_url, path_offset)+"@"+str(source_start_rev), path_offset])
1031 run_svn(["add", path_offset])
1032 # Update any properties on the newly added content
1033 paths = run_svn(["list", "--recursive", "-r", source_start_rev, source_start_url+"@"+str(source_start_rev)])
1034 paths = paths.strip("\n").split("\n")
1035 if options.keep_prop:
1036 sync_svn_props(source_start_url, source_start_rev, "")
1037 for path in paths:
1038 if not path:
1039 continue
1040 # Directories have a trailing slash in the "svn list" output
1041 path_is_dir = True if path[-1] == "/" else False
1042 path_offset = path.rstrip('/') if path_is_dir else path
1043 ui.status(" A %s", join_path(source_base, path_offset), level=ui.VERBOSE)
1044 if options.keep_prop:
1045 sync_svn_props(source_start_url, source_start_rev, path_offset)
1046 # Commit the initial import
1047 num_entries_proc += 1
1048 target_revprops = gen_tracking_revprops(source_start_rev) # Build source-tracking revprop's
1049 target_rev = commit_from_svn_log_entry(source_start_log, target_revprops=target_revprops)
1050 if target_rev:
1051 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
1052 set_rev_map(source_start_rev, target_rev)
1053 commit_count += 1
1054 target_rev_last = target_rev
1055 if options.verify:
1056 verify_commit(source_rev, target_rev_last)
1057 else:
1058 # Re-build the rev_map based on any already-replayed history in target_url
1059 build_rev_map(target_url, target_rev_last, source_info)
1060 if not rev_map:
1061 parser.error("called with continue-mode, but no already-replayed source history found in target_url")
1062 source_start_rev = int(max(rev_map, key=rev_map.get))
1063 assert source_start_rev
1064 ui.status("Continuing from source revision %s.", source_start_rev, level=ui.VERBOSE)
1065 ui.status("", level=ui.VERBOSE)
1066
1067 if options.keep_revnum and source_start_rev < target_rev_last:
1068 parser.error("last target revision is equal-or-higher than starting source revision; "
1069 "cannot use --keep-revnum mode")
1070
1071 svn_vers_t = svnclient.get_svn_client_version()
1072 svn_vers = float(".".join(map(str, svn_vers_t[0:2])))
1073
1074 # Load SVN log starting from source_start_rev + 1
1075 source_ancestors = find_svn_ancestors(source_repos_url, source_base, source_end_rev, prefix=" ")
1076 it_log_entries = svnclient.iter_svn_log_entries(source_url, source_start_rev+1, source_end_rev, get_revprops=True, ancestors=source_ancestors) if source_start_rev < source_end_rev else []
1077 source_rev = None
1078
1079 # TODO: Now that commit_from_svn_log_entry() might try to do a "svn propset svn:date",
1080 # we might want some better KeyboardInterupt handilng here, to ensure that
1081 # commit_from_svn_log_entry() always runs as an atomic unit.
1082 try:
1083 for log_entry in it_log_entries:
1084 if options.entries_proc_limit:
1085 if num_entries_proc >= options.entries_proc_limit:
1086 break
1087 # Replay this revision from source_url into target_url
1088 source_rev = log_entry['revision']
1089 log_url = log_entry['url']
1090 #print "source_url:%s log_url:%s" % (source_url, log_url)
1091 if options.keep_revnum:
1092 target_rev_last = keep_revnum(source_rev, target_rev_last, wc_target_tmp)
1093 disp_svn_log_summary(log_entry)
1094 # Process all the changed-paths in this log entry
1095 commit_paths = []
1096 process_svn_log_entry(log_entry, source_ancestors, commit_paths)
1097 num_entries_proc += 1
1098 # Commit any changes made to _wc_target
1099 target_revprops = gen_tracking_revprops(source_rev) # Build source-tracking revprop's
1100 target_rev = commit_from_svn_log_entry(log_entry, commit_paths, target_revprops=target_revprops)
1101 if target_rev:
1102 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
1103 source_rev = log_entry['revision']
1104 set_rev_map(source_rev, target_rev)
1105 target_rev_last = target_rev
1106 commit_count += 1
1107 if options.verify:
1108 verify_commit(source_rev, target_rev_last, log_entry)
1109 # Run "svn cleanup" every 100 commits if SVN 1.7+, to clean-up orphaned ".svn/pristines/*"
1110 if svn_vers >= 1.7 and (commit_count % 100 == 0):
1111 run_svn(["cleanup"])
1112 if not source_rev:
1113 # If there were no new source_url revisions to process, init source_rev
1114 # for the "finally" message below to be the last source revision replayed.
1115 source_rev = source_start_rev
1116 if options.verify:
1117 verify_commit(source_start_rev, target_rev_last)
1118
1119 except KeyboardInterrupt:
1120 print "\nStopped by user."
1121 print "\nCleaning-up..."
1122 run_svn(["cleanup"])
1123 full_svn_revert()
1124 except:
1125 print "\nCommand failed with following error:\n"
1126 traceback.print_exc()
1127 print "\nCleaning-up..."
1128 run_svn(["cleanup"])
1129 print run_svn(["status"])
1130 full_svn_revert()
1131 finally:
1132 print "\nFinished at source revision %s%s." % (source_rev, " (dry-run)" if options.dry_run else "")
1133
1134 def main():
1135 # Defined as entry point. Must be callable without arguments.
1136 usage = "svn2svn, version %s\n" % str(full_version) + \
1137 "<http://nynim.org/projects/svn2svn> <https://github.com/tonyduckles/svn2svn>\n\n" + \
1138 "Usage: %prog [OPTIONS] source_url target_url\n"
1139 description = """\
1140 Replicate (replay) history from one SVN repository to another. Maintain
1141 logical ancestry wherever possible, so that 'svn log' on the replayed repo
1142 will correctly follow file/folder renames.
1143
1144 Examples:
1145 Create a copy of only /trunk from source repo, starting at r5000
1146 $ svnadmin create /svn/target
1147 $ svn mkdir -m 'Add trunk' file:///svn/target/trunk
1148 $ svn2svn -av -r 5000 http://server/source/trunk file:///svn/target/trunk
1149 1. The target_url will be checked-out to ./_wc_target
1150 2. The first commit to http://server/source/trunk at/after r5000 will be
1151 exported & added into _wc_target
1152 3. All revisions affecting http://server/source/trunk (starting at r5000)
1153 will be replayed to _wc_target. Any add/copy/move/replaces that are
1154 copy-from'd some path outside of /trunk (e.g. files renamed on a
1155 /branch and branch was merged into /trunk) will correctly maintain
1156 logical ancestry where possible.
1157
1158 Use continue-mode (-c) to pick-up where the last run left-off
1159 $ svn2svn -avc http://server/source/trunk file:///svn/target/trunk
1160 1. The target_url will be checked-out to ./_wc_target, if not already
1161 checked-out
1162 2. All new revisions affecting http://server/source/trunk starting from
1163 the last replayed revision to file:///svn/target/trunk (based on the
1164 svn2svn:* revprops) will be replayed to _wc_target, maintaining all
1165 logical ancestry where possible."""
1166 parser = optparse.OptionParser(usage, description=description,
1167 formatter=HelpFormatter(), version="%prog "+str(full_version))
1168 parser.add_option("-v", "--verbose", dest="verbosity", action="count", default=1,
1169 help="enable additional output (use -vv or -vvv for more)")
1170 parser.add_option("-a", "--archive", action="store_true", dest="archive", default=False,
1171 help="archive/mirror mode; same as -UDP (see REQUIRE's below)\n"
1172 "maintain same commit author, same commit time, and file/dir properties")
1173 parser.add_option("-U", "--keep-author", action="store_true", dest="keep_author", default=False,
1174 help="maintain same commit authors (svn:author) as source\n"
1175 "(REQUIRES target_url be non-auth'd, e.g. file://-based, since this uses --username to set author)")
1176 parser.add_option("-D", "--keep-date", action="store_true", dest="keep_date", default=False,
1177 help="maintain same commit time (svn:date) as source\n"
1178 "(REQUIRES 'pre-revprop-change' hook script to allow 'svn:date' changes)")
1179 parser.add_option("-P", "--keep-prop", action="store_true", dest="keep_prop", default=False,
1180 help="maintain same file/dir SVN properties as source")
1181 parser.add_option("-R", "--keep-revnum", action="store_true", dest="keep_revnum", default=False,
1182 help="maintain same rev #'s as source. creates placeholder target "
1183 "revisions (by modifying a 'svn2svn:keep-revnum' property at the root of the target repo)")
1184 parser.add_option("-c", "--continue", action="store_true", dest="cont_from_break",
1185 help="continue from last source commit to target (based on svn2svn:* revprops)")
1186 parser.add_option("-r", "--revision", type="string", dest="revision", metavar="ARG",
1187 help="revision range to replay from source_url\n"
1188 "A revision argument can be one of:\n"
1189 " START start rev # (end will be 'HEAD')\n"
1190 " START:END start and ending rev #'s\n"
1191 "Any revision # formats which SVN understands are "
1192 "supported, e.g. 'HEAD', '{2010-01-31}', etc.")
1193 parser.add_option("-u", "--log-author", action="store_true", dest="log_author", default=False,
1194 help="append source commit author to replayed commit mesages")
1195 parser.add_option("-d", "--log-date", action="store_true", dest="log_date", default=False,
1196 help="append source commit time to replayed commit messages")
1197 parser.add_option("-l", "--limit", type="int", dest="entries_proc_limit", metavar="NUM",
1198 help="maximum number of source revisions to process")
1199 parser.add_option("-n", "--dry-run", action="store_true", dest="dry_run", default=False,
1200 help="process next source revision but don't commit changes to "
1201 "target working-copy (forces --limit=1)")
1202 parser.add_option("-x", "--verify", action="store_const", const=1, dest="verify",
1203 help="verify ancestry and content for changed paths in commit after every target commit or last target commit")
1204 parser.add_option("-X", "--verify-all", action="store_const", const=2, dest="verify",
1205 help="verify ancestry and content for entire target_url tree after every target commit or last target commit")
1206 parser.add_option("--debug", dest="verbosity", const=ui.DEBUG, action="store_const",
1207 help="enable debugging output (same as -vvv)")
1208 global options
1209 options, args = parser.parse_args()
1210 if len(args) != 2:
1211 parser.error("incorrect number of arguments")
1212 if options.verbosity < 10:
1213 # Expand multiple "-v" arguments to a real ui._level value
1214 options.verbosity *= 10
1215 if options.dry_run:
1216 # When in dry-run mode, only try to process the next log_entry
1217 options.entries_proc_limit = 1
1218 options.rev_start = None
1219 options.rev_end = None
1220 if options.revision:
1221 # Reg-ex for matching a revision arg (http://svnbook.red-bean.com/en/1.5/svn.tour.revs.specifiers.html#svn.tour.revs.dates)
1222 rev_patt = '[0-9A-Z]+|\{[0-9A-Za-z/\\ :-]+\}'
1223 rev = None
1224 match = re.match('^('+rev_patt+'):('+rev_patt+')$', options.revision) # First try start:end match
1225 if match is None: match = re.match('^('+rev_patt+')$', options.revision) # Next, try start match
1226 if match is None:
1227 parser.error("unexpected --revision argument format; see 'svn help log' for valid revision formats")
1228 rev = match.groups()
1229 options.rev_start = rev[0] if len(rev)>0 else None
1230 options.rev_end = rev[1] if len(rev)>1 else None
1231 if options.archive:
1232 options.keep_author = True
1233 options.keep_date = True
1234 options.keep_prop = True
1235 ui.update_config(options)
1236 return real_main(args, parser)
1237
1238
1239 if __name__ == "__main__":
1240 sys.exit(main() or 0)