]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn/run/svn2svn.py
Correctly handle --keep-revnum mode during initial import
[svn2svn.git] / svn2svn / run / svn2svn.py
1 """
2 Replicate (replay) changesets from one SVN repository to another.
3 """
4
5 from .. import base_version, full_version
6 from .. import ui
7 from .. import svnclient
8 from ..shell import run_svn
9 from ..errors import (ExternalCommandFailed, UnsupportedSVNAction, InternalError, VerificationError)
10 from parse import HelpFormatter
11
12 import sys
13 import os
14 import time
15 import traceback
16 import shutil
17 import operator
18 import optparse
19 import re
20 from datetime import datetime
21
22 _valid_svn_actions = "MARD" # The list of known SVN action abbr's, from "svn log"
23
24 # Module-level variables/parameters
25 source_url = "" # URL to source path in source SVN repo, e.g. 'http://server/svn/source/trunk'
26 source_repos_url = "" # URL to root of source SVN repo, e.g. 'http://server/svn/source'
27 source_base = "" # Relative path of source_url in source SVN repo, e.g. '/trunk'
28 source_repos_uuid = "" # UUID of source SVN repo
29 target_url ="" # URL to target path in target SVN repo, e.g. 'file:///svn/repo_target/trunk'
30 rev_map = {} # The running mapping-table dictionary for source_url rev #'s -> target_url rev #'s
31 options = None # optparser options
32
33 def parse_svn_commit_rev(output):
34 """
35 Parse the revision number from the output of "svn commit".
36 """
37 output_lines = output.strip("\n").split("\n")
38 rev_num = None
39 for line in output_lines:
40 if line[0:19] == 'Committed revision ':
41 rev_num = line[19:].rstrip('.')
42 break
43 assert rev_num is not None
44 return int(rev_num)
45
46 def commit_from_svn_log_entry(log_entry, commit_paths=None, target_revprops=None):
47 """
48 Given an SVN log entry and an optional list of changed paths, do an svn commit.
49 """
50 # TODO: Run optional external shell hook here, for doing pre-commit filtering
51 # Display the _wc_target "svn status" info if running in -vv (or higher) mode
52 if ui.get_level() >= ui.EXTRA:
53 ui.status(">> commit_from_svn_log_entry: Pre-commit _wc_target status:", level=ui.EXTRA, color='CYAN')
54 ui.status(run_svn(["status"]), level=ui.EXTRA, color='CYAN')
55 # This will use the local timezone for displaying commit times
56 timestamp = int(log_entry['date'])
57 svn_date = str(datetime.fromtimestamp(timestamp))
58 # Uncomment this one one if you prefer UTC commit times
59 #svn_date = "%d 0" % timestamp
60 args = ["commit", "--force-log"]
61 message = log_entry['message']
62 if options.log_date:
63 message += "\nDate: " + svn_date
64 if options.log_author:
65 message += "\nAuthor: " + log_entry['author']
66 if options.keep_author:
67 args += ["--username", log_entry['author']]
68 args += ["-m", message]
69 revprops = {}
70 if log_entry['revprops']:
71 # Carry forward any revprop's from the source revision
72 for v in log_entry['revprops']:
73 revprops[v['name']] = v['value']
74 if target_revprops:
75 # Add any extra revprop's we want to set for the target repo commits
76 for v in target_revprops:
77 revprops[v['name']] = v['value']
78 if revprops:
79 for key in revprops:
80 args += ["--with-revprop", "%s=%s" % (key, str(revprops[key]))]
81 if commit_paths:
82 if len(commit_paths)<100:
83 # If we don't have an excessive amount of individual changed paths, pass
84 # those to the "svn commit" command. Else, pass nothing so we commit at
85 # the root of the working-copy.
86 args += list(commit_paths)
87 rev_num = None
88 if not options.dry_run:
89 # Run the "svn commit" command, and screen-scrape the target_rev value (if any)
90 output = run_svn(args)
91 rev_num = parse_svn_commit_rev(output) if output else None
92 if rev_num is not None:
93 ui.status("Committed revision %s.", rev_num)
94 if options.keep_date:
95 run_svn(["propset", "--revprop", "-r", rev_num, "svn:date", log_entry['date_raw']])
96 return rev_num
97
98 def full_svn_revert():
99 """
100 Do an "svn revert" and proactively remove any extra files in the working copy.
101 """
102 run_svn(["revert", "--recursive", "."])
103 output = run_svn(["status"])
104 if output:
105 output_lines = output.strip("\n").split("\n")
106 for line in output_lines:
107 if line[0] == "?":
108 path = line[4:].strip(" ")
109 if os.path.isfile(path):
110 os.remove(path)
111 if os.path.isdir(path):
112 shutil.rmtree(path)
113
114 def gen_tracking_revprops(source_rev):
115 """
116 Build an array of svn2svn-specific source-tracking revprops.
117 """
118 revprops = [{'name':'svn2svn:source_uuid', 'value':source_repos_uuid},
119 {'name':'svn2svn:source_url', 'value':source_url},
120 {'name':'svn2svn:source_rev', 'value':source_rev}]
121 return revprops
122
123 def sync_svn_props(source_url, source_rev, path_offset):
124 """
125 Carry-forward any unversioned properties from the source repo to the
126 target WC.
127 """
128 source_props = svnclient.get_all_props(source_url+"/"+path_offset, source_rev)
129 target_props = svnclient.get_all_props(path_offset)
130 if 'svn:mergeinfo' in source_props:
131 # Never carry-forward "svn:mergeinfo"
132 del source_props['svn:mergeinfo']
133 for prop in target_props:
134 if prop not in source_props:
135 # Remove any properties which exist in target but not source
136 run_svn(["propdel", prop, path_offset])
137 for prop in source_props:
138 if prop not in target_props or \
139 source_props[prop] != target_props[prop]:
140 # Set/update any properties which exist in source but not target or
141 # whose value differs between source vs. target.
142 run_svn(["propset", prop, source_props[prop], path_offset])
143
144 def in_svn(p, require_in_repo=False, prefix=""):
145 """
146 Check if a given file/folder is being tracked by Subversion.
147 Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
148 With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
149 Use "svn status" to check the status of the file/folder.
150 """
151 entries = svnclient.get_svn_status(p, no_recursive=True)
152 if not entries:
153 return False
154 d = entries[0]
155 if require_in_repo and (d['status'] == 'added' or d['revision'] is None):
156 # If caller requires this path to be in the SVN repo, prevent returning True
157 # for paths that are only locally-added.
158 ret = False
159 else:
160 # Don't consider files tracked as deleted in the WC as under source-control.
161 # Consider files which are locally added/copied as under source-control.
162 ret = True if not (d['status'] == 'deleted') and (d['type'] == 'normal' or d['status'] == 'added' or d['copied'] == 'true') else False
163 ui.status(prefix + ">> in_svn('%s', require_in_repo=%s) --> %s", p, str(require_in_repo), str(ret), level=ui.DEBUG, color='GREEN')
164 return ret
165
166 def is_child_path(path, p_path):
167 return True if (path == p_path) or (path.startswith(p_path+"/")) else False
168
169 def find_svn_ancestors(svn_repos_url, base_path, source_path, source_rev, prefix = ""):
170 """
171 Given a source path, walk the SVN history backwards to inspect the ancestory of
172 that path, seeing if it traces back to base_path. Build an array of copyfrom_path
173 and copyfrom_revision pairs for each of the "svn copies". If we find a copyfrom_path
174 which base_path is a substring match of (e.g. we crawled back to the initial branch-
175 copy from trunk), then return the collection of ancestor paths. Otherwise,
176 copyfrom_path has no ancestory compared to base_path.
177
178 This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
179 file/folder was renamed in a branch and then that branch was merged back to trunk.
180
181 'svn_repos_url' is the full URL to the root of the SVN repository,
182 e.g. 'file:///path/to/repo'
183 'base_path' is the path in the SVN repo to the target path we're trying to
184 trace ancestry back to, e.g. '/trunk'.
185 'source_path' is the path in the SVN repo to the source path to start checking
186 ancestry at, e.g. '/branches/fix1/projectA/file1.txt'.
187 (full_path = svn_repos_url+base_path+"/"+path_offset)
188 'source_rev' is the revision to start walking the history of source_path backwards from.
189 """
190 ui.status(prefix + ">> find_svn_ancestors: Start: (%s) source_path: %s base_path: %s",
191 svn_repos_url, source_path+"@"+str(source_rev), base_path, level=ui.DEBUG, color='YELLOW')
192 done = False
193 working_path = base_path+"/"+source_path
194 working_rev = source_rev
195 first_iter_done = False
196 ancestors_temp = []
197 while not done:
198 # Get the first "svn log" entry for this path (relative to @rev)
199 ui.status(prefix + ">> find_svn_ancestors: %s", svn_repos_url + working_path+"@"+str(working_rev), level=ui.DEBUG, color='YELLOW')
200 log_entry = svnclient.get_first_svn_log_entry(svn_repos_url + working_path, 1, working_rev, True)
201 if not log_entry:
202 ui.status(prefix + ">> find_svn_ancestors: Done: no log_entry", level=ui.DEBUG, color='YELLOW')
203 done = True
204 break
205 # If we found a copy-from case which matches our base_path, we're done.
206 # ...but only if we've at least tried to search for the first copy-from path.
207 if first_iter_done and is_child_path(working_path, base_path):
208 ui.status(prefix + ">> find_svn_ancestors: Done: Found is_child_path(working_path, base_path) and first_iter_done=True", level=ui.DEBUG, color='YELLOW')
209 done = True
210 break
211 first_iter_done = True
212 # Search for any actions on our target path (or parent paths).
213 changed_paths_temp = []
214 for d in log_entry['changed_paths']:
215 path = d['path']
216 if path in working_path:
217 changed_paths_temp.append({'path': path, 'data': d})
218 if not changed_paths_temp:
219 # If no matches, then we've hit the end of the chain and this path has no ancestry back to base_path.
220 ui.status(prefix + ">> find_svn_ancestors: Done: No matching changed_paths", level=ui.DEBUG, color='YELLOW')
221 done = True
222 continue
223 # Reverse-sort any matches, so that we start with the most-granular (deepest in the tree) path.
224 changed_paths = sorted(changed_paths_temp, key=operator.itemgetter('path'), reverse=True)
225 # Find the action for our working_path in this revision. Use a loop to check in reverse order,
226 # so that if the target file/folder is "M" but has a parent folder with an "A" copy-from.
227 for v in changed_paths:
228 d = v['data']
229 path = d['path']
230 # Check action-type for this file
231 action = d['action']
232 if action not in _valid_svn_actions:
233 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
234 % (log_entry['revision'], action))
235 ui.status(prefix + "> %s %s%s", action, path,
236 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
237 level=ui.DEBUG, color='YELLOW')
238 if action == 'D':
239 # If file/folder was deleted, it has no ancestor
240 ancestors_temp = []
241 ui.status(prefix + ">> find_svn_ancestors: Done: deleted", level=ui.DEBUG, color='YELLOW')
242 done = True
243 break
244 if action in 'RA':
245 # If file/folder was added/replaced but not a copy, it has no ancestor
246 if not d['copyfrom_path']:
247 ancestors_temp = []
248 ui.status(prefix + ">> find_svn_ancestors: Done: %s with no copyfrom_path",
249 "Added" if action == "A" else "Replaced",
250 level=ui.DEBUG, color='YELLOW')
251 done = True
252 break
253 # Else, file/folder was added/replaced and is a copy, so add an entry to our ancestors list
254 # and keep checking for ancestors
255 ui.status(prefix + ">> find_svn_ancestors: Found copy-from (action=%s): %s --> %s",
256 action, path, d['copyfrom_path']+"@"+str(d['copyfrom_revision']),
257 level=ui.DEBUG, color='YELLOW')
258 ancestors_temp.append({'path': path, 'revision': log_entry['revision'],
259 'copyfrom_path': d['copyfrom_path'], 'copyfrom_rev': d['copyfrom_revision']})
260 working_path = working_path.replace(d['path'], d['copyfrom_path'])
261 working_rev = d['copyfrom_revision']
262 # Follow the copy and keep on searching
263 break
264 ancestors = []
265 if ancestors_temp:
266 ancestors.append({'path': base_path+"/"+source_path, 'revision': source_rev})
267 working_path = base_path+"/"+source_path
268 for idx in range(len(ancestors_temp)):
269 d = ancestors_temp[idx]
270 working_path = working_path.replace(d['path'], d['copyfrom_path'])
271 working_rev = d['copyfrom_rev']
272 ancestors.append({'path': working_path, 'revision': working_rev})
273 if ui.get_level() >= ui.DEBUG:
274 max_len = 0
275 for idx in range(len(ancestors)):
276 d = ancestors[idx]
277 max_len = max(max_len, len(d['path']+"@"+str(d['revision'])))
278 ui.status(prefix + ">> find_svn_ancestors: Found parent ancestors:", level=ui.DEBUG, color='YELLOW_B')
279 for idx in range(len(ancestors)-1):
280 d = ancestors[idx]
281 d_next = ancestors[idx+1]
282 ui.status(prefix + " [%s] %s <-- %s", idx,
283 str(d['path']+"@"+str(d['revision'])).ljust(max_len),
284 str(d_next['path']+"@"+str(d_next['revision'])).ljust(max_len),
285 level=ui.DEBUG, color='YELLOW')
286 else:
287 ui.status(prefix + ">> find_svn_ancestors: No ancestor-chain found: %s",
288 svn_repos_url+base_path+"/"+source_path+"@"+str(source_rev), level=ui.DEBUG, color='YELLOW')
289 return ancestors
290
291 def get_rev_map(source_rev, prefix):
292 """
293 Find the equivalent rev # in the target repo for the given rev # from the source repo.
294 """
295 ui.status(prefix + ">> get_rev_map(%s)", source_rev, level=ui.DEBUG, color='GREEN')
296 # Find the highest entry less-than-or-equal-to source_rev
297 for rev in range(int(source_rev), 0, -1):
298 ui.status(prefix + ">> get_rev_map: rev=%s in_rev_map=%s", rev, str(rev in rev_map), level=ui.DEBUG, color='BLACK_B')
299 if rev in rev_map:
300 return int(rev_map[rev])
301 # Else, we fell off the bottom of the rev_map. Ruh-roh...
302 return None
303
304 def set_rev_map(source_rev, target_rev):
305 ui.status(">> set_rev_map: source_rev=%s target_rev=%s", source_rev, target_rev, level=ui.DEBUG, color='GREEN')
306 global rev_map
307 rev_map[int(source_rev)]=int(target_rev)
308
309 def build_rev_map(target_url, target_end_rev, source_info):
310 """
311 Check for any already-replayed history from source_url (source_info) and
312 build the mapping-table of source_rev -> target_rev.
313 """
314 global rev_map
315 rev_map = {}
316 ui.status("Rebuilding target_rev -> source_rev rev_map...", level=ui.VERBOSE)
317 proc_count = 0
318 it_log_entries = svnclient.iter_svn_log_entries(target_url, 1, target_end_rev, get_changed_paths=False, get_revprops=True)
319 for log_entry in it_log_entries:
320 if log_entry['revprops']:
321 revprops = {}
322 for v in log_entry['revprops']:
323 if v['name'].startswith('svn2svn:'):
324 revprops[v['name']] = v['value']
325 if revprops and \
326 revprops['svn2svn:source_uuid'] == source_info['repos_uuid'] and \
327 revprops['svn2svn:source_url'] == source_info['url']:
328 source_rev = revprops['svn2svn:source_rev']
329 target_rev = log_entry['revision']
330 set_rev_map(source_rev, target_rev)
331
332 def get_svn_dirlist(svn_path, rev_number = ""):
333 """
334 Get a list of all the child contents (recusive) of the given folder path.
335 """
336 args = ["list"]
337 path = svn_path
338 if rev_number:
339 args += ["-r", rev_number]
340 path += "@"+str(rev_number)
341 args += [path]
342 paths = run_svn(args, no_fail=True)
343 paths = paths.strip("\n").split("\n") if len(paths)>1 else []
344 return paths
345
346 def path_in_list(paths, path):
347 for p in paths:
348 if is_child_path(path, p):
349 return True
350 return False
351
352 def add_path(paths, path):
353 if not path_in_list(paths, path):
354 paths.append(path)
355
356 def do_svn_add(path_offset, source_rev, parent_copyfrom_path="", parent_copyfrom_rev="", \
357 export_paths={}, is_dir = False, skip_paths=[], prefix = ""):
358 """
359 Given the add'd source path, replay the "svn add/copy" commands to correctly
360 track renames across copy-from's.
361
362 For example, consider a sequence of events like this:
363 1. svn copy /trunk /branches/fix1
364 2. (Make some changes on /branches/fix1)
365 3. svn mv /branches/fix1/Proj1 /branches/fix1/Proj2 " Rename folder
366 4. svn mv /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt " Rename file inside renamed folder
367 5. svn co /trunk && svn merge /branches/fix1
368 After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
369 and and add of /trunk/Proj2 copy-from /branches/fix1/Proj2. If we were just
370 to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
371 we'd lose the logical history that Proj2/file2.txt is really a descendant
372 of Proj1/file1.txt.
373
374 'path_offset' is the offset from source_base to the file to check ancestry for,
375 e.g. 'projectA/file1.txt'. path = source_repos_url + source_base + path_offset.
376 'source_rev' is the revision ("svn log") that we're processing from the source repo.
377 'parent_copyfrom_path' and 'parent_copyfrom_rev' is the copy-from path of the parent
378 directory, when being called recursively by do_svn_add_dir().
379 'export_paths' is the list of path_offset's that we've deferred running "svn export" on.
380 'is_dir' is whether path_offset is a directory (rather than a file).
381 """
382 ui.status(prefix + ">> do_svn_add: %s %s", source_base+"/"+path_offset+"@"+str(source_rev),
383 " (parent-copyfrom: "+parent_copyfrom_path+"@"+str(parent_copyfrom_rev)+")" if parent_copyfrom_path else "",
384 level=ui.DEBUG, color='GREEN')
385 # Check if the given path has ancestors which chain back to the current source_base
386 found_ancestor = False
387 ancestors = find_svn_ancestors(source_repos_url, source_base, path_offset, source_rev, prefix+" ")
388 # ancestors[n] is the original (pre-branch-copy) trunk path.
389 # ancestors[n-1] is the first commit on the new branch.
390 copyfrom_path = ancestors[len(ancestors)-1]['path'] if ancestors else ""
391 copyfrom_rev = ancestors[len(ancestors)-1]['revision'] if ancestors else ""
392 if ancestors:
393 # The copy-from path has ancestory back to source_url.
394 ui.status(prefix + ">> do_svn_add: Check copy-from: Found parent: %s", copyfrom_path+"@"+str(copyfrom_rev),
395 level=ui.DEBUG, color='GREEN', bold=True)
396 found_ancestor = True
397 # Map the copyfrom_rev (source repo) to the equivalent target repo rev #. This can
398 # return None in the case where copyfrom_rev is *before* our source_start_rev.
399 tgt_rev = get_rev_map(copyfrom_rev, prefix+" ")
400 ui.status(prefix + ">> do_svn_add: get_rev_map: %s (source) -> %s (target)", copyfrom_rev, tgt_rev, level=ui.DEBUG, color='GREEN')
401 else:
402 ui.status(prefix + ">> do_svn_add: Check copy-from: No ancestor chain found.", level=ui.DEBUG, color='GREEN')
403 found_ancestor = False
404 if found_ancestor and tgt_rev:
405 # Check if this path_offset in the target WC already has this ancestry, in which
406 # case there's no need to run the "svn copy" (again).
407 path_in_svn = in_svn(path_offset, prefix=prefix+" ")
408 log_entry = svnclient.get_last_svn_log_entry(path_offset, 1, 'HEAD', get_changed_paths=False) if in_svn(path_offset, require_in_repo=True, prefix=prefix+" ") else []
409 if (not log_entry or (log_entry['revision'] != tgt_rev)):
410 copyfrom_offset = copyfrom_path[len(source_base):].strip('/')
411 ui.status(prefix + ">> do_svn_add: svn_copy: Copy-from: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
412 ui.status(prefix + " copyfrom: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
413 ui.status(prefix + " p_copyfrom: %s", parent_copyfrom_path+"@"+str(parent_copyfrom_rev) if parent_copyfrom_path else "", level=ui.DEBUG, color='GREEN')
414 if path_in_svn and \
415 ((parent_copyfrom_path and is_child_path(copyfrom_path, parent_copyfrom_path)) and \
416 (parent_copyfrom_rev and copyfrom_rev == parent_copyfrom_rev)):
417 # When being called recursively, if this child entry has the same ancestor as the
418 # the parent, then no need to try to run another "svn copy".
419 ui.status(prefix + ">> do_svn_add: svn_copy: Same ancestry as parent: %s",
420 parent_copyfrom_path+"@"+str(parent_copyfrom_rev),level=ui.DEBUG, color='GREEN')
421 pass
422 else:
423 # Copy this path from the equivalent path+rev in the target repo, to create the
424 # equivalent history.
425 if parent_copyfrom_path:
426 # If we have a parent copy-from path, we mis-match that so display a status
427 # message describing the action we're mimic'ing. If path_in_svn, then this
428 # is logically a "replace" rather than an "add".
429 ui.status(" %s %s (from %s)", ('R' if path_in_svn else 'A'), source_base+"/"+path_offset, ancestors[1]['path']+"@"+str(copyfrom_rev), level=ui.VERBOSE)
430 if path_in_svn:
431 # If local file is already under version-control, then this is a replace.
432 ui.status(prefix + ">> do_svn_add: pre-copy: local path already exists: %s", path_offset, level=ui.DEBUG, color='GREEN')
433 run_svn(["remove", "--force", path_offset])
434 run_svn(["copy", "-r", tgt_rev, target_url+"/"+copyfrom_offset+"@"+str(tgt_rev), path_offset])
435 if is_dir:
436 # Export the final verison of all files in this folder.
437 add_path(export_paths, path_offset)
438 else:
439 # Export the final verison of this file.
440 run_svn(["export", "--force", "-r", source_rev,
441 source_repos_url+source_base+"/"+path_offset+"@"+str(source_rev), path_offset])
442 if options.keep_prop:
443 sync_svn_props(source_url, source_rev, path_offset)
444 else:
445 ui.status(prefix + ">> do_svn_add: Skipped 'svn copy': %s", path_offset, level=ui.DEBUG, color='GREEN')
446 else:
447 # Else, either this copy-from path has no ancestry back to source_url OR copyfrom_rev comes
448 # before our initial source_start_rev (i.e. tgt_rev == None), so can't do a "svn copy".
449 # Create (parent) directory if needed.
450 # TODO: This is (nearly) a duplicate of code in process_svn_log_entry(). Should this be
451 # split-out to a shared tag?
452 p_path = path_offset if is_dir else os.path.dirname(path_offset).strip() or '.'
453 if not os.path.exists(p_path):
454 run_svn(["mkdir", p_path])
455 if not in_svn(path_offset, prefix=prefix+" "):
456 if is_dir:
457 # Export the final verison of all files in this folder.
458 add_path(export_paths, path_offset)
459 else:
460 # Export the final verison of this file. We *need* to do this before running
461 # the "svn add", even if we end-up re-exporting this file again via export_paths.
462 run_svn(["export", "--force", "-r", source_rev,
463 source_repos_url+source_base+"/"+path_offset+"@"+str(source_rev), path_offset])
464 # If not already under version-control, then "svn add" this file/folder.
465 run_svn(["add", "--parents", path_offset])
466 if options.keep_prop:
467 sync_svn_props(source_url, source_rev, path_offset)
468 if is_dir:
469 # For any folders that we process, process any child contents, so that we correctly
470 # replay copies/replaces/etc.
471 do_svn_add_dir(path_offset, source_rev, copyfrom_path, copyfrom_rev, export_paths, skip_paths, prefix+" ")
472
473 def do_svn_add_dir(path_offset, source_rev, parent_copyfrom_path, parent_copyfrom_rev, \
474 export_paths, skip_paths, prefix=""):
475 # Get the directory contents, to compare between the local WC (target_url) vs. the remote repo (source_url)
476 # TODO: paths_local won't include add'd paths because "svn ls" lists the contents of the
477 # associated remote repo folder. (Is this a problem?)
478 paths_local = get_svn_dirlist(path_offset)
479 paths_remote = get_svn_dirlist(source_url+"/"+path_offset, source_rev)
480 ui.status(prefix + ">> do_svn_add_dir: paths_local: %s", str(paths_local), level=ui.DEBUG, color='GREEN')
481 ui.status(prefix + ">> do_svn_add_dir: paths_remote: %s", str(paths_remote), level=ui.DEBUG, color='GREEN')
482 # Update files/folders which exist in remote but not local
483 for path in paths_remote:
484 path_is_dir = True if path[-1] == "/" else False
485 working_path = path_offset+"/"+(path.rstrip('/') if path_is_dir else path)
486 if not working_path in skip_paths:
487 do_svn_add(working_path, source_rev, parent_copyfrom_path, parent_copyfrom_rev,
488 export_paths, path_is_dir, skip_paths, prefix+" ")
489 # Remove files/folders which exist in local but not remote
490 for path in paths_local:
491 if not path in paths_remote:
492 ui.status(" %s %s", 'D', source_base+"/"+path_offset+"/"+path, level=ui.VERBOSE)
493 run_svn(["remove", "--force", path_offset+"/"+path])
494 # TODO: Does this handle deleted folders too? Wouldn't want to have a case
495 # where we only delete all files from folder but leave orphaned folder around.
496
497 def process_svn_log_entry(log_entry, commit_paths, prefix = ""):
498 """
499 Process SVN changes from the given log entry. Build an array (commit_paths)
500 of the paths in the working-copy that were changed, i.e. the paths which
501 we'll pass to "svn commit".
502 """
503 export_paths = []
504 source_rev = log_entry['revision']
505 ui.status(prefix + ">> process_svn_log_entry: %s", source_url+"@"+str(source_rev), level=ui.DEBUG, color='GREEN')
506 for d in log_entry['changed_paths']:
507 # Get the full path for this changed_path
508 # e.g. '/branches/bug123/projectA/file1.txt'
509 path = d['path']
510 if not is_child_path(path, source_base):
511 # Ignore changed files that are not part of this subdir
512 ui.status(prefix + ">> process_svn_log_entry: Unrelated path: %s (base: %s)", path, source_base, level=ui.DEBUG, color='GREEN')
513 continue
514 if d['kind'] == "":
515 # The "kind" value was introduced in SVN 1.6, and "svn log --xml" won't return a "kind"
516 # value for commits made on a pre-1.6 repo, even if the server is now running 1.6.
517 # We need to use other methods to fetch the node-kind for these cases.
518 d['kind'] = svnclient.get_kind(source_repos_url, path, source_rev, d['action'], log_entry['changed_paths'])
519 assert (d['kind'] == 'file') or (d['kind'] == 'dir')
520 path_is_dir = True if d['kind'] == 'dir' else False
521 path_is_file = True if d['kind'] == 'file' else False
522 # Calculate the offset (based on source_base) for this changed_path
523 # e.g. 'projectA/file1.txt'
524 # (path = source_base + "/" + path_offset)
525 path_offset = path[len(source_base):].strip("/")
526 # Get the action for this path
527 action = d['action']
528 if action not in _valid_svn_actions:
529 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
530 % (source_rev, action))
531 ui.status(" %s %s%s", action, d['path'],
532 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
533 level=ui.VERBOSE)
534
535 # Try to be efficient and keep track of an explicit list of paths in the
536 # working copy that changed. If we commit from the root of the working copy,
537 # then SVN needs to crawl the entire working copy looking for pending changes.
538 commit_paths.append(path_offset)
539
540 # Special-handling for replace's
541 if action == 'R':
542 # If file was "replaced" (deleted then re-added, all in same revision),
543 # then we need to run the "svn rm" first, then change action='A'. This
544 # lets the normal code below handle re-"svn add"'ing the files. This
545 # should replicate the "replace".
546 if in_svn(path_offset):
547 # Target path might not be under version-control yet, e.g. parent "add"
548 # was a copy-from a branch which had no ancestry back to trunk, and each
549 # child folder under that parent folder is a "replace" action on the final
550 # merge to trunk. Since the child folders will be in skip_paths, do_svn_add
551 # wouldn't have created them while processing the parent "add" path.
552 run_svn(["remove", "--force", path_offset])
553 action = 'A'
554
555 # Handle all the various action-types
556 # (Handle "add" first, for "svn copy/move" support)
557 if action == 'A':
558 # Determine where to export from.
559 svn_copy = False
560 # Handle cases where this "add" was a copy from another URL in the source repo
561 if d['copyfrom_revision']:
562 copyfrom_path = d['copyfrom_path']
563 copyfrom_rev = d['copyfrom_revision']
564 skip_paths = []
565 for tmp_d in log_entry['changed_paths']:
566 tmp_path = tmp_d['path']
567 if is_child_path(tmp_path, path):
568 # Build list of child entries which are also in the changed_paths list,
569 # so that do_svn_add() can skip processing these entries when recursing
570 # since we'll end-up processing them later.
571 tmp_path_offset = tmp_path[len(source_base):].strip("/")
572 skip_paths.append(tmp_path_offset)
573 do_svn_add(path_offset, source_rev, "", "", export_paths, path_is_dir, skip_paths, prefix+" ")
574 # Else just "svn export" the files from the source repo and "svn add" them.
575 else:
576 # Create (parent) directory if needed
577 p_path = path_offset if path_is_dir else os.path.dirname(path_offset).strip() or '.'
578 if not os.path.exists(p_path):
579 run_svn(["mkdir", p_path])
580 # Export the entire added tree.
581 if path_is_dir:
582 # For directories, defer the (recurisve) "svn export". Might have a
583 # situation in a branch merge where the entry in the svn-log is a
584 # non-copy-from'd "add" but there are child contents (that we haven't
585 # gotten to yet in log_entry) that are copy-from's. When we try do
586 # the "svn copy" later on in do_svn_add() for those copy-from'd paths,
587 # having pre-existing (svn-add'd) contents creates some trouble.
588 # Instead, just create the stub folders ("svn mkdir" above) and defer
589 # exporting the final file-state until the end.
590 add_path(export_paths, path_offset)
591 else:
592 # Export the final verison of this file. We *need* to do this before running
593 # the "svn add", even if we end-up re-exporting this file again via export_paths.
594 run_svn(["export", "--force", "-r", source_rev,
595 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
596 if not in_svn(path_offset, prefix=prefix+" "):
597 # Need to use in_svn here to handle cases where client committed the parent
598 # folder and each indiv sub-folder.
599 run_svn(["add", "--parents", path_offset])
600 if options.keep_prop:
601 sync_svn_props(source_url, source_rev, path_offset)
602
603 elif action == 'D':
604 run_svn(["remove", "--force", path_offset])
605
606 elif action == 'M':
607 if path_is_file:
608 run_svn(["export", "--force", "-N" , "-r", source_rev,
609 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
610 if options.keep_prop:
611 sync_svn_props(source_url, source_rev, path_offset)
612
613 else:
614 raise InternalError("Internal Error: process_svn_log_entry: Unhandled 'action' value: '%s'"
615 % action)
616
617 # Export the final version of all add'd paths from source_url
618 if export_paths:
619 for path_offset in export_paths:
620 run_svn(["export", "--force", "-r", source_rev,
621 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
622
623 def keep_revnum(source_rev, target_rev_last, wc_target_tmp):
624 """
625 Add "padding" target revisions as needed to keep source and target
626 revision #'s identical.
627 """
628 if int(source_rev) <= int(target_rev_last):
629 raise InternalError("keep-revnum mode is enabled, "
630 "but source revision (r%s) is less-than-or-equal last target revision (r%s)" % \
631 (source_rev, target_rev_last))
632 if int(target_rev_last) < int(source_rev)-1:
633 # Add "padding" target revisions to keep source and target rev #'s identical
634 if os.path.exists(wc_target_tmp):
635 shutil.rmtree(wc_target_tmp)
636 run_svn(["checkout", "-r", "HEAD", "--depth=empty", target_repos_url, wc_target_tmp])
637 for rev_num in range(int(target_rev_last)+1, int(source_rev)):
638 run_svn(["propset", "svn2svn:keep-revnum", rev_num, wc_target_tmp])
639 output = run_svn(["commit", "-m", "", wc_target_tmp])
640 rev_num_tmp = parse_svn_commit_rev(output) if output else None
641 assert rev_num == rev_num_tmp
642 ui.status("Committed revision %s (keep-revnum).", rev_num)
643 target_rev_last = rev_num
644 shutil.rmtree(wc_target_tmp)
645 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
646 run_svn(["update"])
647 return target_rev_last
648
649 def disp_svn_log_summary(log_entry):
650 ui.status("------------------------------------------------------------------------")
651 ui.status("r%s | %s | %s",
652 log_entry['revision'],
653 log_entry['author'],
654 str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' ')))
655 ui.status(log_entry['message'])
656
657 def real_main(args, parser):
658 global source_url, target_url, rev_map
659 source_url = args.pop(0).rstrip("/") # e.g. 'http://server/svn/source/trunk'
660 target_url = args.pop(0).rstrip("/") # e.g. 'file:///svn/target/trunk'
661 ui.status("options: %s", str(options), level=ui.DEBUG, color='GREEN')
662
663 # Make sure that both the source and target URL's are valid
664 source_info = svnclient.get_svn_info(source_url)
665 assert is_child_path(source_url, source_info['repos_url'])
666 target_info = svnclient.get_svn_info(target_url)
667 assert is_child_path(target_url, target_info['repos_url'])
668
669 # Init global vars
670 global source_repos_url,source_base,source_repos_uuid
671 source_repos_url = source_info['repos_url'] # e.g. 'http://server/svn/source'
672 source_base = source_url[len(source_repos_url):] # e.g. '/trunk'
673 source_repos_uuid = source_info['repos_uuid']
674
675 # Init start and end revision
676 try:
677 source_start_rev = svnclient.get_svn_rev(source_repos_url, options.rev_start if options.rev_start else 1)
678 except ExternalCommandFailed:
679 parser.error("invalid start source revision value: %s" % (options.rev_start))
680 try:
681 source_end_rev = svnclient.get_svn_rev(source_repos_url, options.rev_end if options.rev_end else "HEAD")
682 except ExternalCommandFailed:
683 parser.error("invalid end source revision value: %s" % (options.rev_end))
684 ui.status("Using source revision range %s:%s", source_start_rev, source_end_rev, level=ui.VERBOSE)
685
686 # TODO: If options.keep_date, should we try doing a "svn propset" on an *existing* revision
687 # as a sanity check, so we check if the pre-revprop-change hook script is correctly setup
688 # before doing first replay-commit?
689
690 target_rev_last = target_info['revision'] # Last revision # in the target repo
691 target_repos_url = target_info['repos_url']
692 wc_target = os.path.abspath('_wc_target')
693 wc_target_tmp = os.path.abspath('_tmp_wc_target')
694 num_entries_proc = 0
695 commit_count = 0
696 source_rev = None
697 target_rev = None
698
699 # Check out a working copy of target_url if needed
700 wc_exists = os.path.exists(wc_target)
701 if wc_exists and not options.cont_from_break:
702 shutil.rmtree(wc_target)
703 wc_exists = False
704 if not wc_exists:
705 ui.status("Checking-out _wc_target...", level=ui.VERBOSE)
706 svnclient.svn_checkout(target_url, wc_target)
707 os.chdir(wc_target)
708
709 if not options.cont_from_break:
710 # TODO: Warn user if trying to start (non-continue) into a non-empty target path?
711 # Get the first log entry at/after source_start_rev, which is where
712 # we'll do the initial import from.
713 it_log_start = svnclient.iter_svn_log_entries(source_url, source_start_rev, source_end_rev, get_changed_paths=False)
714 for source_start_log in it_log_start:
715 break
716 if not source_start_log:
717 raise InternalError("Unable to find any matching revisions between %s:%s in source_url: %s" % \
718 (source_start_rev, source_end_rev, source_url))
719
720 # This is the revision we will start from for source_url
721 source_start_rev = source_rev = int(source_start_log['revision'])
722 ui.status("Starting at source revision %s.", source_start_rev, level=ui.VERBOSE)
723 ui.status("")
724 if options.keep_revnum:
725 target_rev_last = keep_revnum(source_rev, target_rev_last, wc_target_tmp)
726
727 # For the initial commit to the target URL, export all the contents from
728 # the source URL at the start-revision.
729 disp_svn_log_summary(svnclient.get_one_svn_log_entry(source_url, source_rev, source_rev))
730 # Export and add file-contents from source_url@source_start_rev
731 top_paths = run_svn(["list", "-r", source_rev, source_url+"@"+str(source_rev)])
732 top_paths = top_paths.strip("\n").split("\n")
733 for path in top_paths:
734 # For each top-level file/folder...
735 if not path:
736 continue
737 # Directories have a trailing slash in the "svn list" output
738 path_is_dir = True if path[-1] == "/" else False
739 path_offset = path.rstrip('/') if path_is_dir else path
740 if in_svn(path_offset, prefix=" "):
741 raise InternalError("Cannot replay history on top of pre-existing structure: %s" % source_url+"/"+path_offset)
742 if path_is_dir and not os.path.exists(path_offset):
743 os.makedirs(path_offset)
744 run_svn(["export", "--force", "-r" , source_rev, source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
745 run_svn(["add", path_offset])
746 # Update any properties on the newly added content
747 paths = run_svn(["list", "--recursive", "-r", source_rev, source_url+"@"+str(source_rev)])
748 paths = paths.strip("\n").split("\n")
749 if options.keep_prop:
750 sync_svn_props(source_url, source_rev, "")
751 for path in paths:
752 if not path:
753 continue
754 # Directories have a trailing slash in the "svn list" output
755 path_is_dir = True if path[-1] == "/" else False
756 path_offset = path.rstrip('/') if path_is_dir else path
757 ui.status(" A %s", source_base+"/"+path_offset, level=ui.VERBOSE)
758 if options.keep_prop:
759 sync_svn_props(source_url, source_rev, path_offset)
760 # Commit the initial import
761 num_entries_proc += 1
762 target_revprops = gen_tracking_revprops(source_rev) # Build source-tracking revprop's
763 target_rev = commit_from_svn_log_entry(source_start_log, target_revprops=target_revprops)
764 if target_rev:
765 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
766 set_rev_map(source_rev, target_rev)
767 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
768 run_svn(["update"])
769 commit_count += 1
770 target_rev_last = target_rev
771 else:
772 # Re-build the rev_map based on any already-replayed history in target_url
773 build_rev_map(target_url, target_rev_last, source_info)
774 if not rev_map:
775 parser.error("called with continue-mode, but no already-replayed source history found in target_url")
776 source_start_rev = int(max(rev_map, key=rev_map.get))
777 assert source_start_rev
778 ui.status("Continuing from source revision %s.", source_start_rev, level=ui.VERBOSE)
779 ui.status("")
780
781 if options.keep_revnum and source_start_rev < target_rev_last:
782 parser.error("last target revision is equal-or-higher than starting source revision; "
783 "cannot use --keep-revnum mode")
784
785 svn_vers_t = svnclient.get_svn_client_version()
786 svn_vers = float(".".join(map(str, svn_vers_t[0:2])))
787
788 # Load SVN log starting from source_start_rev + 1
789 it_log_entries = svnclient.iter_svn_log_entries(source_url, source_start_rev+1, source_end_rev, get_revprops=True) if source_start_rev < source_end_rev else []
790 source_rev = None
791
792 # TODO: Now that commit_from_svn_log_entry() might try to do a "svn propset svn:date",
793 # we might want some better KeyboardInterupt handilng here, to ensure that
794 # commit_from_svn_log_entry() always runs as an atomic unit.
795 try:
796 for log_entry in it_log_entries:
797 if options.entries_proc_limit:
798 if num_entries_proc >= options.entries_proc_limit:
799 break
800 # Replay this revision from source_url into target_url
801 source_rev = log_entry['revision']
802 if options.keep_revnum:
803 target_rev_last = keep_revnum(source_rev, target_rev_last, wc_target_tmp)
804 disp_svn_log_summary(log_entry)
805 # Process all the changed-paths in this log entry
806 commit_paths = []
807 process_svn_log_entry(log_entry, commit_paths)
808 num_entries_proc += 1
809 # Commit any changes made to _wc_target
810 target_revprops = gen_tracking_revprops(source_rev) # Build source-tracking revprop's
811 target_rev = commit_from_svn_log_entry(log_entry, commit_paths, target_revprops=target_revprops)
812 if target_rev:
813 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
814 source_rev = log_entry['revision']
815 set_rev_map(source_rev, target_rev)
816 target_rev_last = target_rev
817 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
818 run_svn(["update"])
819 commit_count += 1
820 # Run "svn cleanup" every 100 commits if SVN 1.7+, to clean-up orphaned ".svn/pristines/*"
821 if svn_vers >= 1.7 and (commit_count % 100 == 0):
822 run_svn(["cleanup"])
823 if not source_rev:
824 # If there were no new source_url revisions to process, init source_rev
825 # for the "finally" message below to be the last source revision replayed.
826 source_rev = source_start_rev
827
828 except KeyboardInterrupt:
829 print "\nStopped by user."
830 print "\nCleaning-up..."
831 run_svn(["cleanup"])
832 full_svn_revert()
833 except:
834 print "\nCommand failed with following error:\n"
835 traceback.print_exc()
836 print "\nCleaning-up..."
837 run_svn(["cleanup"])
838 print run_svn(["status"])
839 full_svn_revert()
840 finally:
841 print "\nFinished at source revision %s%s." % (source_rev, " (dry-run)" if options.dry_run else "")
842
843 def main():
844 # Defined as entry point. Must be callable without arguments.
845 usage = "svn2svn, version %s\n" % str(full_version) + \
846 "<http://nynim.org/projects/svn2svn> <https://github.com/tonyduckles/svn2svn>\n\n" + \
847 "Usage: %prog [OPTIONS] source_url target_url\n"
848 description = """\
849 Replicate (replay) history from one SVN repository to another. Maintain
850 logical ancestry wherever possible, so that 'svn log' on the replayed repo
851 will correctly follow file/folder renames.
852
853 Examples:
854 Create a copy of only /trunk from source repo, starting at r5000
855 $ svnadmin create /svn/target
856 $ svn mkdir -m 'Add trunk' file:///svn/target/trunk
857 $ svn2svn -av -r 5000 http://server/source/trunk file:///svn/target/trunk
858 1. The target_url will be checked-out to ./_wc_target
859 2. The first commit to http://server/source/trunk at/after r5000 will be
860 exported & added into _wc_target
861 3. All revisions affecting http://server/source/trunk (starting at r5000)
862 will be replayed to _wc_target. Any add/copy/move/replaces that are
863 copy-from'd some path outside of /trunk (e.g. files renamed on a
864 /branch and branch was merged into /trunk) will correctly maintain
865 logical ancestry where possible.
866
867 Use continue-mode (-c) to pick-up where the last run left-off
868 $ svn2svn -avc http://server/source/trunk file:///svn/target/trunk
869 1. The target_url will be checked-out to ./_wc_target, if not already
870 checked-out
871 2. All new revisions affecting http://server/source/trunk starting from
872 the last replayed revision to file:///svn/target/trunk (based on the
873 svn2svn:* revprops) will be replayed to _wc_target, maintaining all
874 logical ancestry where possible."""
875 parser = optparse.OptionParser(usage, description=description,
876 formatter=HelpFormatter(), version="%prog "+str(full_version))
877 parser.add_option("-v", "--verbose", dest="verbosity", action="count", default=1,
878 help="enable additional output (use -vv or -vvv for more)")
879 parser.add_option("-a", "--archive", action="store_true", dest="archive", default=False,
880 help="archive/mirror mode; same as -UDP (see REQUIRE's below)\n"
881 "maintain same commit author, same commit time, and file/dir properties")
882 parser.add_option("-U", "--keep-author", action="store_true", dest="keep_author", default=False,
883 help="maintain same commit authors (svn:author) as source\n"
884 "(REQUIRES target_url be non-auth'd, e.g. file://-based, since this uses --username to set author)")
885 parser.add_option("-D", "--keep-date", action="store_true", dest="keep_date", default=False,
886 help="maintain same commit time (svn:date) as source\n"
887 "(REQUIRES 'pre-revprop-change' hook script to allow 'svn:date' changes)")
888 parser.add_option("-P", "--keep-prop", action="store_true", dest="keep_prop", default=False,
889 help="maintain same file/dir SVN properties as source")
890 parser.add_option("-R", "--keep-revnum", action="store_true", dest="keep_revnum", default=False,
891 help="maintain same rev #'s as source. creates placeholder target "
892 "revisions (by modifying a 'svn2svn:keep-revnum' property at the root of the target repo)")
893 parser.add_option("-c", "--continue", action="store_true", dest="cont_from_break",
894 help="continue from last source commit to target (based on svn2svn:* revprops)")
895 parser.add_option("-r", "--revision", type="string", dest="revision", metavar="ARG",
896 help="revision range to replay from source_url\n"
897 "A revision argument can be one of:\n"
898 " START start rev # (end will be 'HEAD')\n"
899 " START:END start and ending rev #'s\n"
900 "Any revision # formats which SVN understands are "
901 "supported, e.g. 'HEAD', '{2010-01-31}', etc.")
902 parser.add_option("-u", "--log-author", action="store_true", dest="log_author", default=False,
903 help="append source commit author to replayed commit mesages")
904 parser.add_option("-d", "--log-date", action="store_true", dest="log_date", default=False,
905 help="append source commit time to replayed commit messages")
906 parser.add_option("-l", "--limit", type="int", dest="entries_proc_limit", metavar="NUM",
907 help="maximum number of source revisions to process")
908 parser.add_option("-n", "--dry-run", action="store_true", dest="dry_run", default=False,
909 help="process next source revision but don't commit changes to "
910 "target working-copy (forces --limit=1)")
911 parser.add_option("--debug", dest="verbosity", const=ui.DEBUG, action="store_const",
912 help="enable debugging output (same as -vvv)")
913 global options
914 options, args = parser.parse_args()
915 if len(args) != 2:
916 parser.error("incorrect number of arguments")
917 if options.verbosity < 10:
918 # Expand multiple "-v" arguments to a real ui._level value
919 options.verbosity *= 10
920 if options.dry_run:
921 # When in dry-run mode, only try to process the next log_entry
922 options.entries_proc_limit = 1
923 options.rev_start = None
924 options.rev_end = None
925 if options.revision:
926 # Reg-ex for matching a revision arg (http://svnbook.red-bean.com/en/1.5/svn.tour.revs.specifiers.html#svn.tour.revs.dates)
927 rev_patt = '[0-9A-Z]+|\{[0-9A-Za-z/\\ :-]+\}'
928 rev = None
929 match = re.match('^('+rev_patt+'):('+rev_patt+')$', options.revision) # First try start:end match
930 if match is None: match = re.match('^('+rev_patt+')$', options.revision) # Next, try start match
931 if match is None:
932 parser.error("unexpected --revision argument format; see 'svn help log' for valid revision formats")
933 rev = match.groups()
934 options.rev_start = rev[0] if len(rev)>0 else None
935 options.rev_end = rev[1] if len(rev)>1 else None
936 if options.archive:
937 options.keep_author = True
938 options.keep_date = True
939 options.keep_prop = True
940 ui.update_config(options)
941 return real_main(args, parser)
942
943
944 if __name__ == "__main__":
945 sys.exit(main() or 0)