]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn/run/svn2svn.py
* Add new --dry-run and --limit command-line args
[svn2svn.git] / svn2svn / run / svn2svn.py
1 """
2 Replicate (replay) changesets from one SVN repository to another:
3 * Maintains full logical history (e.g. uses "svn copy" for renames).
4 * Maintains original commit messages.
5 * Optionally maintain source author info. (Only supported if accessing
6 target SVN repo via file://)
7 * Cannot maintain original commit date, but appends original commit date
8 for each commit message: "Date: %d".
9 * Optionally run an external shell script before each replayed commit
10 to give the ability to dynamically exclude or modify files as part
11 of the replay.
12
13 License: GPLv3, same as hgsvn (https://bitbucket.org/andialbrecht/hgsvn)
14 Author: Tony Duckles (https://github.com/tonyduckles/svn2svn)
15 (Inspired by http://code.google.com/p/svn2svn/, and uses code for hgsvn
16 for SVN client handling)
17 """
18
19 from .. import base_version, full_version
20 from .. import ui
21 from .. import svnclient
22 from ..shell import run_svn
23 from ..errors import (ExternalCommandFailed, UnsupportedSVNAction, InternalError, VerificationError)
24
25 import sys
26 import os
27 import time
28 import traceback
29 import shutil
30 import operator
31 from optparse import OptionParser,OptionGroup
32 from datetime import datetime
33
34 _valid_svn_actions = "MARD" # The list of known SVN action abbr's, from "svn log"
35
36 def commit_from_svn_log_entry(log_entry, options, commit_paths=None, target_revprops=None):
37 """
38 Given an SVN log entry and an optional list of changed paths, do an svn commit.
39 """
40 # TODO: Run optional external shell hook here, for doing pre-commit filtering
41 # Display the _wc_target "svn status" info if running in -vv (or higher) mode
42 if ui.get_level() >= ui.EXTRA:
43 ui.status(">> commit_from_svn_log_entry: Pre-commit _wc_target status:", level=ui.EXTRA, color='CYAN')
44 ui.status(run_svn(["status"]), level=ui.EXTRA, color='CYAN')
45 # This will use the local timezone for displaying commit times
46 timestamp = int(log_entry['date'])
47 svn_date = str(datetime.fromtimestamp(timestamp))
48 # Uncomment this one one if you prefer UTC commit times
49 #svn_date = "%d 0" % timestamp
50 args = ["commit", "--force-log"]
51 if options.keep_author:
52 args += ["-m", log_entry['message'] + "\nDate: " + svn_date, "--username", log_entry['author']]
53 else:
54 args += ["-m", log_entry['message'] + "\nDate: " + svn_date + "\nAuthor: " + log_entry['author']]
55 revprops = {}
56 if log_entry['revprops']:
57 # Carry forward any revprop's from the source revision
58 for v in log_entry['revprops']:
59 revprops[v['name']] = v['value']
60 if target_revprops:
61 # Add any extra revprop's we want to set for the target repo commits
62 for v in target_revprops:
63 revprops[v['name']] = v['value']
64 if revprops:
65 for key in revprops:
66 args += ["--with-revprop", "%s=%s" % (key, str(revprops[key]))]
67 if commit_paths:
68 if len(commit_paths)<100:
69 # If we don't have an excessive amount of individual changed paths, pass
70 # those to the "svn commit" command. Else, pass nothing so we commit at
71 # the root of the working-copy.
72 args += list(commit_paths)
73 rev = None
74 if not options.dry_run:
75 # Run the "svn commit" command, and screen-scrape the target_rev value (if any)
76 output = run_svn(args)
77 if output:
78 output_lines = output.strip("\n").split("\n")
79 rev = ""
80 for line in output_lines:
81 if line[0:19] == 'Committed revision ':
82 rev = line[19:].rstrip('.')
83 break
84 if rev:
85 ui.status("Committed revision %s.", rev)
86 return rev
87
88 def full_svn_revert():
89 """
90 Do an "svn revert" and proactively remove any extra files in the working copy.
91 """
92 run_svn(["revert", "--recursive", "."])
93 output = run_svn(["status"])
94 if output:
95 output_lines = output.strip("\n").split("\n")
96 for line in output_lines:
97 if line[0] == "?":
98 path = line[4:].strip(" ")
99 if os.path.isfile(path):
100 os.remove(path)
101 if os.path.isdir(path):
102 shutil.rmtree(path)
103
104 def gen_tracking_revprops(source_repos_uuid, source_url, source_rev):
105 """
106 Build an array of svn2svn-specific source-tracking revprops.
107 """
108 revprops = [{'name':'svn2svn:source_uuid', 'value':source_repos_uuid},
109 {'name':'svn2svn:source_url', 'value':source_url},
110 {'name':'svn2svn:source_rev', 'value':source_rev}]
111 return revprops
112
113 def in_svn(p, require_in_repo=False, prefix=""):
114 """
115 Check if a given file/folder is being tracked by Subversion.
116 Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
117 With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
118 Use "svn status" to check the status of the file/folder.
119 """
120 entries = svnclient.get_svn_status(p, no_recursive=True)
121 if not entries:
122 return False
123 d = entries[0]
124 if require_in_repo and (d['status'] == 'added' or d['revision'] is None):
125 # If caller requires this path to be in the SVN repo, prevent returning True
126 # for paths that are only locally-added.
127 ret = False
128 else:
129 # Don't consider files tracked as deleted in the WC as under source-control.
130 # Consider files which are locally added/copied as under source-control.
131 ret = True if not (d['status'] == 'deleted') and (d['type'] == 'normal' or d['status'] == 'added' or d['copied'] == 'true') else False
132 ui.status(prefix + ">> in_svn('%s', require_in_repo=%s) --> %s", p, str(require_in_repo), str(ret), level=ui.DEBUG, color='GREEN')
133 return ret
134
135 def find_svn_ancestors(svn_repos_url, base_path, source_path, source_rev, prefix = ""):
136 """
137 Given a source path, walk the SVN history backwards to inspect the ancestory of
138 that path, seeing if it traces back to base_path. Build an array of copyfrom_path
139 and copyfrom_revision pairs for each of the "svn copies". If we find a copyfrom_path
140 which base_path is a substring match of (e.g. we crawled back to the initial branch-
141 copy from trunk), then return the collection of ancestor paths. Otherwise,
142 copyfrom_path has no ancestory compared to base_path.
143
144 This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
145 file/folder was renamed in a branch and then that branch was merged back to trunk.
146
147 'svn_repos_url' is the full URL to the root of the SVN repository,
148 e.g. 'file:///path/to/repo'
149 'base_path' is the path in the SVN repo to the target path we're trying to
150 trace ancestry back to, e.g. 'trunk'.
151 'source_path' is the path in the SVN repo to the source path to start checking
152 ancestry at, e.g. 'branches/fix1/projectA/file1.txt'.
153 (full_path = svn_repos_url+base_path+"/"+path_offset)
154 'source_rev' is the revision to start walking the history of source_path backwards from.
155 """
156 ui.status(prefix + ">> find_svn_ancestors: Start: (%s) source_path: %s base_path: %s",
157 svn_repos_url, source_path+"@"+str(source_rev), base_path, level=ui.DEBUG, color='YELLOW')
158 done = False
159 working_path = base_path+"/"+source_path
160 working_rev = source_rev
161 first_iter_done = False
162 ancestors_temp = []
163 while not done:
164 # Get the first "svn log" entry for this path (relative to @rev)
165 ui.status(prefix + ">> find_svn_ancestors: %s", svn_repos_url + working_path+"@"+str(working_rev), level=ui.DEBUG, color='YELLOW')
166 log_entry = svnclient.get_first_svn_log_entry(svn_repos_url + working_path+"@"+str(working_rev), 1, working_rev, True)
167 if not log_entry:
168 ui.status(prefix + ">> find_svn_ancestors: Done: no log_entry", level=ui.DEBUG, color='YELLOW')
169 done = True
170 break
171 # If we found a copy-from case which matches our base_path, we're done.
172 # ...but only if we've at least tried to search for the first copy-from path.
173 if first_iter_done and working_path.startswith(base_path):
174 ui.status(prefix + ">> find_svn_ancestors: Done: Found working_path.startswith(base_path) and first_iter_done=True", level=ui.DEBUG, color='YELLOW')
175 done = True
176 break
177 first_iter_done = True
178 # Search for any actions on our target path (or parent paths).
179 changed_paths_temp = []
180 for d in log_entry['changed_paths']:
181 path = d['path']
182 if path in working_path:
183 changed_paths_temp.append({'path': path, 'data': d})
184 if not changed_paths_temp:
185 # If no matches, then we've hit the end of the chain and this path has no ancestry back to base_path.
186 ui.status(prefix + ">> find_svn_ancestors: Done: No matching changed_paths", level=ui.DEBUG, color='YELLOW')
187 done = True
188 continue
189 # Reverse-sort any matches, so that we start with the most-granular (deepest in the tree) path.
190 changed_paths = sorted(changed_paths_temp, key=operator.itemgetter('path'), reverse=True)
191 # Find the action for our working_path in this revision. Use a loop to check in reverse order,
192 # so that if the target file/folder is "M" but has a parent folder with an "A" copy-from.
193 for v in changed_paths:
194 d = v['data']
195 path = d['path']
196 # Check action-type for this file
197 action = d['action']
198 if action not in _valid_svn_actions:
199 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
200 % (log_entry['revision'], action))
201 ui.status(prefix + "> %s %s%s", action, path,
202 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
203 level=ui.DEBUG, color='YELLOW')
204 if action == 'D':
205 # If file/folder was deleted, it has no ancestor
206 ancestors_temp = []
207 ui.status(prefix + ">> find_svn_ancestors: Done: deleted", level=ui.DEBUG, color='YELLOW')
208 done = True
209 break
210 if action in 'RA':
211 # If file/folder was added/replaced but not a copy, it has no ancestor
212 if not d['copyfrom_path']:
213 ancestors_temp = []
214 ui.status(prefix + ">> find_svn_ancestors: Done: %s with no copyfrom_path",
215 "Added" if action == "A" else "Replaced",
216 level=ui.DEBUG, color='YELLOW')
217 done = True
218 break
219 # Else, file/folder was added/replaced and is a copy, so add an entry to our ancestors list
220 # and keep checking for ancestors
221 ui.status(prefix + ">> find_svn_ancestors: Found copy-from (action=%s): %s --> %s",
222 action, path, d['copyfrom_path']+"@"+str(d['copyfrom_revision']),
223 level=ui.DEBUG, color='YELLOW')
224 ancestors_temp.append({'path': path, 'revision': log_entry['revision'],
225 'copyfrom_path': d['copyfrom_path'], 'copyfrom_rev': d['copyfrom_revision']})
226 working_path = working_path.replace(d['path'], d['copyfrom_path'])
227 working_rev = d['copyfrom_revision']
228 # Follow the copy and keep on searching
229 break
230 ancestors = []
231 if ancestors_temp:
232 ancestors.append({'path': base_path+"/"+source_path, 'revision': source_rev})
233 working_path = base_path+"/"+source_path
234 for idx in range(len(ancestors_temp)):
235 d = ancestors_temp[idx]
236 working_path = working_path.replace(d['path'], d['copyfrom_path'])
237 working_rev = d['copyfrom_rev']
238 ancestors.append({'path': working_path, 'revision': working_rev})
239 if ui.get_level() >= ui.DEBUG:
240 max_len = 0
241 for idx in range(len(ancestors)):
242 d = ancestors[idx]
243 max_len = max(max_len, len(d['path']+"@"+str(d['revision'])))
244 ui.status(prefix + ">> find_svn_ancestors: Found parent ancestors:", level=ui.DEBUG, color='YELLOW_B')
245 for idx in range(len(ancestors)-1):
246 d = ancestors[idx]
247 d_next = ancestors[idx+1]
248 ui.status(prefix + " [%s] %s <-- %s", idx,
249 str(d['path']+"@"+str(d['revision'])).ljust(max_len),
250 str(d_next['path']+"@"+str(d_next['revision'])).ljust(max_len),
251 level=ui.DEBUG, color='YELLOW')
252 else:
253 ui.status(prefix + ">> find_svn_ancestors: No ancestor-chain found: %s",
254 svn_repos_url+base_path+"/"+source_path+"@"+str(source_rev), level=ui.DEBUG, color='YELLOW')
255 return ancestors
256
257 def get_rev_map(rev_map, source_rev, prefix):
258 """
259 Find the equivalent rev # in the target repo for the given rev # from the source repo.
260 """
261 ui.status(prefix + ">> get_rev_map(%s)", source_rev, level=ui.DEBUG, color='GREEN')
262 # Find the highest entry less-than-or-equal-to source_rev
263 for rev in range(int(source_rev), 0, -1):
264 ui.status(prefix + ">> get_rev_map: rev=%s in_rev_map=%s", rev, str(rev in rev_map), level=ui.DEBUG, color='BLACK_B')
265 if rev in rev_map:
266 return int(rev_map[rev])
267 # Else, we fell off the bottom of the rev_map. Ruh-roh...
268 return None
269
270 def set_rev_map(rev_map, source_rev, target_rev):
271 ui.status(">> set_rev_map: source_rev=%s target_rev=%s", source_rev, target_rev, level=ui.DEBUG, color='GREEN')
272 rev_map[int(source_rev)]=int(target_rev)
273
274 def build_rev_map(target_url, source_info):
275 """
276 Check for any already-replayed history from source_url (source_info) and
277 build the mapping-table of source_rev -> target_rev.
278 """
279 rev_map = {}
280 ui.status("Rebuilding rev_map...", level=ui.VERBOSE)
281 proc_count = 0
282 it_log_entries = svnclient.iter_svn_log_entries(target_url, 1, 'HEAD', get_changed_paths=False, get_revprops=True)
283 for log_entry in it_log_entries:
284 if log_entry['revprops']:
285 revprops = {}
286 for v in log_entry['revprops']:
287 if v['name'].startswith('svn2svn:'):
288 revprops[v['name']] = v['value']
289 if revprops['svn2svn:source_uuid'] == source_info['repos_uuid'] and \
290 revprops['svn2svn:source_url'] == source_info['url']:
291 source_rev = revprops['svn2svn:source_rev']
292 target_rev = log_entry['revision']
293 set_rev_map(rev_map, source_rev, target_rev)
294 return rev_map
295
296 def get_svn_dirlist(svn_path, svn_rev = ""):
297 """
298 Get a list of all the child contents (recusive) of the given folder path.
299 """
300 args = ["list"]
301 path = svn_path
302 if svn_rev:
303 args += ["-r", svn_rev]
304 path += "@"+str(svn_rev)
305 args += [path]
306 paths = run_svn(args, no_fail=True)
307 paths = paths.strip("\n").split("\n") if len(paths)>1 else []
308 return paths
309
310 def path_in_list(paths, path):
311 for p in paths:
312 if path.startswith(p):
313 return True
314 return False
315
316 def add_path(paths, path):
317 if not path_in_list(paths, path):
318 paths.append(path)
319
320 def do_svn_add(source_repos_url, source_url, path_offset, target_url, source_rev, \
321 parent_copyfrom_path="", parent_copyfrom_rev="", export_paths={}, \
322 rev_map={}, is_dir = False, prefix = ""):
323 """
324 Given the add'd source path, replay the "svn add/copy" commands to correctly
325 track renames across copy-from's.
326
327 For example, consider a sequence of events like this:
328 1. svn copy /trunk /branches/fix1
329 2. (Make some changes on /branches/fix1)
330 3. svn mv /branches/fix1/Proj1 /branches/fix1/Proj2 " Rename folder
331 4. svn mv /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt " Rename file inside renamed folder
332 5. svn co /trunk && svn merge /branches/fix1
333 After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
334 and and add of /trunk/Proj2 copy-from /branches/fix1/Proj2. If we were just
335 to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
336 we'd lose the logical history that Proj2/file2.txt is really a descendant
337 of Proj1/file1.txt.
338
339 'source_repos_url' is the full URL to the root of the source repository.
340 'source_url' is the full URL to the source path in the source repository.
341 'path_offset' is the offset from source_base to the file to check ancestry for,
342 e.g. 'projectA/file1.txt'. path = source_repos_url + source_base + path_offset.
343 'target_url' is the full URL to the target path in the target repository.
344 'source_rev' is the revision ("svn log") that we're processing from the source repo.
345 'parent_copyfrom_path' and 'parent_copyfrom_rev' is the copy-from path of the parent
346 directory, when being called recursively by do_svn_add_dir().
347 'export_paths' is the list of path_offset's that we've deferred running "svn export" on.
348 'rev_map' is the running mapping-table dictionary for source-repo rev #'s
349 to the equivalent target-repo rev #'s.
350 'is_dir' is whether path_offset is a directory (rather than a file).
351 """
352 source_base = source_url[len(source_repos_url):]
353 ui.status(prefix + ">> do_svn_add: %s %s", source_base+"/"+path_offset+"@"+str(source_rev),
354 " (parent-copyfrom: "+parent_copyfrom_path+"@"+str(parent_copyfrom_rev)+")" if parent_copyfrom_path else "",
355 level=ui.DEBUG, color='GREEN')
356 # Check if the given path has ancestors which chain back to the current source_base
357 found_ancestor = False
358 ancestors = find_svn_ancestors(source_repos_url, source_base, path_offset, source_rev, prefix+" ")
359 # ancestors[n] is the original (pre-branch-copy) trunk path.
360 # ancestors[n-1] is the first commit on the new branch.
361 copyfrom_path = ancestors[len(ancestors)-1]['path'] if ancestors else ""
362 copyfrom_rev = ancestors[len(ancestors)-1]['revision'] if ancestors else ""
363 if ancestors:
364 # The copy-from path has ancestory back to source_url.
365 ui.status(prefix + ">> do_svn_add: Check copy-from: Found parent: %s", copyfrom_path+"@"+str(copyfrom_rev),
366 level=ui.DEBUG, color='GREEN', bold=True)
367 found_ancestor = True
368 # Map the copyfrom_rev (source repo) to the equivalent target repo rev #. This can
369 # return None in the case where copyfrom_rev is *before* our source_start_rev.
370 tgt_rev = get_rev_map(rev_map, copyfrom_rev, prefix+" ")
371 ui.status(prefix + ">> do_svn_add: get_rev_map: %s (source) -> %s (target)", copyfrom_rev, tgt_rev, level=ui.DEBUG, color='GREEN')
372 else:
373 ui.status(prefix + ">> do_svn_add: Check copy-from: No ancestor chain found.", level=ui.DEBUG, color='GREEN')
374 found_ancestor = False
375 if found_ancestor and tgt_rev:
376 # Check if this path_offset in the target WC already has this ancestry, in which
377 # case there's no need to run the "svn copy" (again).
378 path_in_svn = in_svn(path_offset, prefix=prefix+" ")
379 log_entry = svnclient.get_last_svn_log_entry(path_offset, 1, 'HEAD', get_changed_paths=False) if in_svn(path_offset, require_in_repo=True, prefix=prefix+" ") else []
380 if (not log_entry or (log_entry['revision'] != tgt_rev)):
381 copyfrom_offset = copyfrom_path[len(source_base):].strip('/')
382 ui.status(prefix + ">> do_svn_add: svn_copy: Copy-from: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
383 ui.status(prefix + " copyfrom: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
384 ui.status(prefix + " p_copyfrom: %s", parent_copyfrom_path+"@"+str(parent_copyfrom_rev) if parent_copyfrom_path else "", level=ui.DEBUG, color='GREEN')
385 if path_in_svn and \
386 ((parent_copyfrom_path and copyfrom_path.startswith(parent_copyfrom_path)) and \
387 (parent_copyfrom_rev and copyfrom_rev == parent_copyfrom_rev)):
388 # When being called recursively, if this child entry has the same ancestor as the
389 # the parent, then no need to try to run another "svn copy".
390 ui.status(prefix + ">> do_svn_add: svn_copy: Same ancestry as parent: %s",
391 parent_copyfrom_path+"@"+str(parent_copyfrom_rev),level=ui.DEBUG, color='GREEN')
392 pass
393 else:
394 # Copy this path from the equivalent path+rev in the target repo, to create the
395 # equivalent history.
396 if parent_copyfrom_path:
397 # If we have a parent copy-from path, we mis-match that so display a status
398 # message describing the action we're mimic'ing. If path_in_svn, then this
399 # is logically a "replace" rather than an "add".
400 ui.status(" %s %s (from %s)", ('R' if path_in_svn else 'A'), source_base+"/"+path_offset, ancestors[1]['path']+"@"+str(copyfrom_rev), level=ui.VERBOSE)
401 if path_in_svn:
402 # If local file is already under version-control, then this is a replace.
403 ui.status(prefix + ">> do_svn_add: pre-copy: local path already exists: %s", path_offset, level=ui.DEBUG, color='GREEN')
404 run_svn(["remove", "--force", path_offset])
405 run_svn(["copy", "-r", tgt_rev, target_url+"/"+copyfrom_offset+"@"+str(tgt_rev), path_offset])
406 # Export the final version of this file/folder from the source repo, to make
407 # sure we're up-to-date.
408 add_path(export_paths, path_offset)
409 else:
410 ui.status(prefix + ">> do_svn_add: Skipped 'svn copy': %s", path_offset, level=ui.DEBUG, color='GREEN')
411 else:
412 # Else, either this copy-from path has no ancestry back to source_url OR copyfrom_rev comes
413 # before our initial source_start_rev (i.e. tgt_rev == None), so can't do a "svn copy".
414 # Create (parent) directory if needed.
415 # TODO: This is (nearly) a duplicate of code in process_svn_log_entry(). Should this be
416 # split-out to a shared tag?
417 p_path = path_offset if is_dir else os.path.dirname(path_offset).strip() or '.'
418 if not os.path.exists(p_path):
419 run_svn(["mkdir", p_path])
420 if not in_svn(path_offset, prefix=prefix+" "):
421 if is_dir:
422 # Export the final verison of all files in this folder.
423 add_path(export_paths, path_offset)
424 else:
425 # Export the final verison of this file. We *need* to do this before running
426 # the "svn add", even if we end-up re-exporting this file again via export_paths.
427 run_svn(["export", "--force", "-r", source_rev,
428 source_repos_url+source_base+"/"+path_offset+"@"+str(source_rev), path_offset])
429 # If not already under version-control, then "svn add" this file/folder.
430 run_svn(["add", "--parents", path_offset])
431 # TODO: Need to copy SVN properties from source repos
432 if is_dir:
433 # For any folders that we process, process any child contents, so that we correctly
434 # replay copies/replaces/etc.
435 do_svn_add_dir(source_repos_url, source_url, path_offset, source_rev, target_url,
436 copyfrom_path, copyfrom_rev, export_paths, rev_map, prefix+" ")
437
438 def do_svn_add_dir(source_repos_url, source_url, path_offset, source_rev, target_url, \
439 parent_copyfrom_path, parent_copyfrom_rev, export_paths, rev_map, prefix=""):
440 source_base = source_url[len(source_repos_url):]
441 # Get the directory contents, to compare between the local WC (target_url) vs. the remote repo (source_url)
442 # TODO: paths_local won't include add'd paths because "svn ls" lists the contents of the
443 # associated remote repo folder. (Is this a problem?)
444 paths_local = get_svn_dirlist(path_offset)
445 paths_remote = get_svn_dirlist(source_url+"/"+path_offset, source_rev)
446 ui.status(prefix + ">> do_svn_add_dir: paths_local: %s", str(paths_local), level=ui.DEBUG, color='GREEN')
447 ui.status(prefix + ">> do_svn_add_dir: paths_remote: %s", str(paths_remote), level=ui.DEBUG, color='GREEN')
448 # Update files/folders which exist in remote but not local
449 for path in paths_remote:
450 path_is_dir = True if path[-1] == "/" else False
451 working_path = path_offset+"/"+(path.rstrip('/') if path_is_dir else path)
452 do_svn_add(source_repos_url, source_url, working_path, target_url, source_rev,
453 parent_copyfrom_path, parent_copyfrom_rev, export_paths,
454 rev_map, path_is_dir, prefix+" ")
455 # Remove files/folders which exist in local but not remote
456 for path in paths_local:
457 if not path in paths_remote:
458 ui.status(" %s %s", 'D', source_base+"/"+path_offset+"/"+path, level=ui.VERBOSE)
459 run_svn(["remove", "--force", path_offset+"/"+path])
460 # TODO: Does this handle deleted folders too? Wouldn't want to have a case
461 # where we only delete all files from folder but leave orphaned folder around.
462
463 def process_svn_log_entry(log_entry, source_repos_url, source_url, target_url, \
464 rev_map, options, commit_paths = [], prefix = ""):
465 """
466 Process SVN changes from the given log entry.
467 Returns array of all the paths in the working-copy that were changed,
468 i.e. the paths which need to be "svn commit".
469
470 'log_entry' is the array structure built by parse_svn_log_xml().
471 'source_repos_url' is the full URL to the root of the source repository.
472 'source_url' is the full URL to the source path in the source repository.
473 'target_url' is the full URL to the target path in the target repository.
474 'rev_map' is the running mapping-table dictionary for source-repo rev #'s
475 to the equivalent target-repo rev #'s.
476 'commit_paths' is the working list of specific paths which changes to pass
477 to the final "svn commit".
478 """
479 export_paths = []
480 # Get the relative offset of source_url based on source_repos_url
481 # e.g. '/branches/bug123'
482 source_base = source_url[len(source_repos_url):]
483 source_rev = log_entry['revision']
484 ui.status(prefix + ">> process_svn_log_entry: %s", source_url+"@"+str(source_rev), level=ui.DEBUG, color='GREEN')
485 for d in log_entry['changed_paths']:
486 # Get the full path for this changed_path
487 # e.g. '/branches/bug123/projectA/file1.txt'
488 path = d['path']
489 if not path.startswith(source_base + "/"):
490 # Ignore changed files that are not part of this subdir
491 if path != source_base:
492 ui.status(prefix + ">> process_svn_log_entry: Unrelated path: %s (base: %s)", path, source_base, level=ui.DEBUG, color='GREEN')
493 continue
494 assert len(d['kind'])>0
495 path_is_dir = True if d['kind'] == 'dir' else False
496 # Calculate the offset (based on source_base) for this changed_path
497 # e.g. 'projectA/file1.txt'
498 # (path = source_base + "/" + path_offset)
499 path_offset = path[len(source_base):].strip("/")
500 # Get the action for this path
501 action = d['action']
502 if action not in _valid_svn_actions:
503 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
504 % (source_rev, action))
505 ui.status(" %s %s%s", action, d['path'],
506 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
507 level=ui.VERBOSE)
508
509 # Try to be efficient and keep track of an explicit list of paths in the
510 # working copy that changed. If we commit from the root of the working copy,
511 # then SVN needs to crawl the entire working copy looking for pending changes.
512 add_path(commit_paths, path_offset)
513
514 # Special-handling for replace's
515 if action == 'R':
516 # If file was "replaced" (deleted then re-added, all in same revision),
517 # then we need to run the "svn rm" first, then change action='A'. This
518 # lets the normal code below handle re-"svn add"'ing the files. This
519 # should replicate the "replace".
520 run_svn(["remove", "--force", path_offset])
521 action = 'A'
522
523 # Handle all the various action-types
524 # (Handle "add" first, for "svn copy/move" support)
525 if action == 'A':
526 # Determine where to export from.
527 svn_copy = False
528 # Handle cases where this "add" was a copy from another URL in the source repos
529 if d['copyfrom_revision']:
530 copyfrom_path = d['copyfrom_path']
531 copyfrom_rev = d['copyfrom_revision']
532 do_svn_add(source_repos_url, source_url, path_offset, target_url, source_rev,
533 "", "", export_paths, rev_map, path_is_dir, prefix+" ")
534 # Else just "svn export" the files from the source repo and "svn add" them.
535 else:
536 # Create (parent) directory if needed
537 p_path = path_offset if path_is_dir else os.path.dirname(path_offset).strip() or '.'
538 if not os.path.exists(p_path):
539 run_svn(["mkdir", p_path])
540 # Export the entire added tree.
541 if path_is_dir:
542 # For directories, defer the (recurisve) "svn export". Might have a
543 # situation in a branch merge where the entry in the svn-log is a
544 # non-copy-from'd "add" but there are child contents (that we haven't
545 # gotten to yet in log_entry) that are copy-from's. When we try do
546 # the "svn copy" later on in do_svn_add() for those copy-from'd paths,
547 # having pre-existing (svn-add'd) contents creates some trouble.
548 # Instead, just create the stub folders ("svn mkdir" above) and defer
549 # exporting the final file-state until the end.
550 add_path(export_paths, path_offset)
551 else:
552 # Export the final verison of this file. We *need* to do this before running
553 # the "svn add", even if we end-up re-exporting this file again via export_paths.
554 run_svn(["export", "--force", "-r", source_rev,
555 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
556 if not in_svn(path_offset, prefix=prefix+" "):
557 # Need to use in_svn here to handle cases where client committed the parent
558 # folder and each indiv sub-folder.
559 run_svn(["add", "--parents", path_offset])
560 # TODO: Need to copy SVN properties from source repos
561
562 elif action == 'D':
563 run_svn(["remove", "--force", path_offset])
564
565 elif action == 'M':
566 # TODO: Is "svn merge -c" correct here? Should this just be an "svn export" plus
567 # proplist updating?
568 out = run_svn(["merge", "-c", source_rev, "--non-recursive",
569 "--non-interactive", "--accept=theirs-full",
570 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
571
572 else:
573 raise InternalError("Internal Error: process_svn_log_entry: Unhandled 'action' value: '%s'"
574 % action)
575
576 # Export the final version of all add'd paths from source_url
577 if export_paths:
578 for path_offset in export_paths:
579 run_svn(["export", "--force", "-r", source_rev,
580 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
581
582 return commit_paths
583
584 def disp_svn_log_summary(log_entry):
585 ui.status("")
586 ui.status("r%s | %s | %s",
587 log_entry['revision'],
588 log_entry['author'],
589 str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' ')))
590 ui.status(log_entry['message'])
591 ui.status("------------------------------------------------------------------------")
592
593 def display_parser_error(parser, message):
594 """
595 Display an options error, and terminate.
596 """
597 print "error:", message
598 print
599 parser.print_help()
600 sys.exit(1)
601
602 def real_main(options, args):
603 source_url = args.pop(0).rstrip("/")
604 target_url = args.pop(0).rstrip("/")
605 ui.status("options: %s", str(options), level=ui.DEBUG, color='GREEN')
606
607 # Make sure that both the source and target URL's are valid
608 source_info = svnclient.get_svn_info(source_url)
609 assert source_url.startswith(source_info['repos_url'])
610 target_info = svnclient.get_svn_info(target_url)
611 assert target_url.startswith(target_info['repos_url'])
612
613 source_end_rev = source_info['revision'] # Get the last revision # for the source repo
614 source_repos_url = source_info['repos_url'] # Get the base URL for the source repo, e.g. 'svn://svn.example.com/svn/repo'
615 source_repos_uuid = source_info['repos_uuid'] # Get the UUID for the source repo
616
617 wc_target = os.path.abspath('_wc_target')
618 rev_map = {}
619 num_entries_proc = 0
620 commit_count = 0
621 source_rev = None
622 target_rev = None
623
624 # Check out a working copy of target_url if needed
625 wc_exists = os.path.exists(wc_target)
626 if wc_exists and not options.cont_from_break:
627 shutil.rmtree(wc_target)
628 wc_exists = False
629 if not wc_exists:
630 svnclient.svn_checkout(target_url, wc_target)
631 os.chdir(wc_target)
632
633 if not options.cont_from_break:
634 # TODO: Warn user if trying to start (non-continue) into a non-empty target path?
635 # Get log entry for the SVN revision we will check out
636 if options.svn_rev:
637 # If specify a rev, get log entry just before or at rev
638 source_start_log = svnclient.get_last_svn_log_entry(source_url, 1, options.svn_rev, False)
639 else:
640 # Otherwise, get log entry of branch creation
641 # Note: Trying to use svnclient.get_first_svn_log_entry(source_url, 1, source_end_rev, False)
642 # ends-up being *VERY* time-consuming on a repo with lots of revisions. Even though
643 # the "svn log" call is passing --limit 1, it seems like that limit-filter is happening
644 # _after_ svn has fetched the full log history. Instead, search the history in chunks
645 # and write some progress to the screen.
646 ui.status("Searching for start source revision (%s)...", source_url, level=ui.VERBOSE)
647 rev = 1
648 chunk_size = 1000
649 done = False
650 while not done:
651 entries = svnclient.run_svn_log(source_url, rev, min(rev+chunk_size-1, target_info['revision']), 1, get_changed_paths=False)
652 if entries:
653 source_start_log = entries[0]
654 done = True
655 break
656 ui.status("...%s...", rev)
657 rev = rev+chunk_size
658 if rev > target_info['revision']:
659 done = True
660 if not source_start_log:
661 raise InternalError("Unable to find first revision for source_url: %s" % source_url)
662
663 # This is the revision we will start from for source_url
664 source_start_rev = source_rev = int(source_start_log['revision'])
665 ui.status("Starting at source revision %s.", source_start_rev, level=ui.VERBOSE)
666
667 # For the initial commit to the target URL, export all the contents from
668 # the source URL at the start-revision.
669 paths = run_svn(["list", "-r", source_rev, source_url+"@"+str(source_rev)])
670 if len(paths)>1:
671 disp_svn_log_summary(svnclient.get_one_svn_log_entry(source_url, source_rev, source_rev))
672 ui.status("(Initial import)", level=ui.VERBOSE)
673 paths = paths.strip("\n").split("\n")
674 for path_raw in paths:
675 # For each top-level file/folder...
676 if not path_raw:
677 continue
678 # Directories have a trailing slash in the "svn list" output
679 path_is_dir = True if path_raw[-1] == "/" else False
680 path = path_raw.rstrip('/') if path_is_dir else path_raw
681 if path_is_dir and not os.path.exists(path):
682 os.makedirs(path)
683 ui.status(" A %s", source_url[len(source_repos_url):]+"/"+path, level=ui.VERBOSE)
684 run_svn(["export", "--force", "-r" , source_rev, source_url+"/"+path+"@"+str(source_rev), path])
685 run_svn(["add", path])
686 num_entries_proc += 1
687 target_revprops = gen_tracking_revprops(source_repos_uuid, source_url, source_rev) # Build source-tracking revprop's
688 target_rev = commit_from_svn_log_entry(source_start_log, options, target_revprops=target_revprops)
689 if target_rev:
690 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
691 set_rev_map(rev_map, source_rev, target_rev)
692 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
693 run_svn(["update"])
694 commit_count += 1
695 else:
696 # Re-build the rev_map based on any already-replayed history in target_url
697 rev_map = build_rev_map(target_url, source_info)
698 if not rev_map:
699 raise RuntimeError("Called with continue-mode, but no already-replayed history found in target repo: %s" % target_url)
700 source_start_rev = int(max(rev_map, key=rev_map.get))
701 assert source_start_rev
702 ui.status("Continuing from source revision %s.", source_start_rev, level=ui.VERBOSE)
703
704 svn_vers_t = svnclient.get_svn_client_version()
705 svn_vers = float(".".join(map(str, svn_vers_t[0:2])))
706
707 # Load SVN log starting from source_start_rev + 1
708 it_log_entries = svnclient.iter_svn_log_entries(source_url, source_start_rev+1, source_end_rev, get_revprops=True)
709 source_rev = None
710
711 try:
712 for log_entry in it_log_entries:
713 if options.entries_proc_limit:
714 if num_entries_proc >= options.entries_proc_limit:
715 break
716 # Replay this revision from source_url into target_url
717 disp_svn_log_summary(log_entry)
718 source_rev = log_entry['revision']
719 # Process all the changed-paths in this log entry
720 commit_paths = []
721 process_svn_log_entry(log_entry, source_repos_url, source_url, target_url,
722 rev_map, options, commit_paths)
723 num_entries_proc += 1
724 # Commit any changes made to _wc_target
725 target_revprops = gen_tracking_revprops(source_repos_uuid, source_url, source_rev) # Build source-tracking revprop's
726 target_rev = commit_from_svn_log_entry(log_entry, options, commit_paths, target_revprops=target_revprops)
727 if target_rev:
728 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
729 source_rev = log_entry['revision']
730 set_rev_map(rev_map, source_rev, target_rev)
731 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
732 run_svn(["update"])
733 commit_count += 1
734 # Run "svn cleanup" every 100 commits if SVN 1.7+, to clean-up orphaned ".svn/pristines/*"
735 if svn_vers >= 1.7 and (commit_count % 100 == 0):
736 run_svn(["cleanup"])
737 if not source_rev:
738 # If there were no new source_url revisions to process, init source_rev
739 # for the "finally" message below.
740 source_rev = source_end_rev
741
742 except KeyboardInterrupt:
743 print "\nStopped by user."
744 print "\nCleaning-up..."
745 run_svn(["cleanup"])
746 full_svn_revert()
747 except:
748 print "\nCommand failed with following error:\n"
749 traceback.print_exc()
750 print "\nCleaning-up..."
751 run_svn(["cleanup"])
752 print run_svn(["status"])
753 full_svn_revert()
754 finally:
755 print "\nFinished at source revision %s." % source_rev
756
757 def main():
758 # Defined as entry point. Must be callable without arguments.
759 usage = """Usage: %prog [OPTIONS] source_url target_url
760
761 Replicate (replay) history from one SVN repository to another. Maintain
762 logical ancestry wherever possible, so that 'svn log' on the replayed
763 repo will correctly follow file/folder renames.
764
765 == Examples ==
766 Create a copy of only /trunk from source repo, starting at r5000
767 $ svnadmin create /svn/target
768 $ svn mkdir -m 'Add trunk' file:///svn/target/trunk
769 $ svn2svn -av -r 5000 http://server/source/trunk file:///svn/target/trunk
770 1. The target_url will be checked-out to ./_wc_target
771 2. The first commit to http://server/source/trunk at/after r5000 will be
772 exported & added into _wc_target
773 3. All revisions affecting http://server/source/trunk (starting at r5000)
774 will be replayed to _wc_target. Any add/copy/move/replaces that are
775 copy-from'd some path outside of /trunk (e.g. files renamed on a
776 /branch and branch was merged into /trunk) will correctly maintain
777 logical ancestry where possible.
778
779 Use continue-mode (-c) to pick-up where the last run left-off
780 $ svn2svn -avc http://server/source/trunk file:///svn/target/trunk
781 1. The target_url will be checked-out to ./_wc_target, if not already
782 checked-out
783 2. All new revisions affecting http://server/source/trunk starting from
784 the last replayed revision to file:///svn/target/trunk (based on the
785 svn2svn:* revprops) will be replayed to _wc_target, maintaining all
786 logical ancestry where possible."""
787 parser = OptionParser(usage, version="%prog "+str(full_version))
788 #parser.remove_option("--help")
789 #parser.add_option("-h", "--help", dest="show_help", action="store_true",
790 # help="show this help message and exit")
791 parser.add_option("-r", "--revision", type="int", dest="svn_rev", metavar="REV",
792 help="initial SVN revision to start source_url replay")
793 parser.add_option("-a", "--keep-author", action="store_true", dest="keep_author", default=False,
794 help="maintain original 'Author' info from source repo")
795 parser.add_option("-c", "--continue", action="store_true", dest="cont_from_break",
796 help="continue from previous break")
797 parser.add_option("-l", "--limit", type="int", dest="entries_proc_limit", metavar="NUM",
798 help="maximum number of log entries to process")
799 parser.add_option("-n", "--dry-run", action="store_true", dest="dry_run", default=False,
800 help="try processing next log entry but don't commit changes to "
801 "target working-copy (forces --limit=1)")
802 parser.add_option("-v", "--verbose", dest="verbosity", action="count", default=1,
803 help="enable additional output (use -vv or -vvv for more)")
804 parser.add_option("--debug", dest="verbosity", const=ui.DEBUG, action="store_const",
805 help="enable debugging output (same as -vvv)")
806 options, args = parser.parse_args()
807 if len(args) != 2:
808 display_parser_error(parser, "incorrect number of arguments")
809 if options.verbosity < 10:
810 # Expand multiple "-v" arguments to a real ui._level value
811 options.verbosity *= 10
812 if options.dry_run:
813 # When in dry-run mode, only try to process the next log_entry
814 options.entries_proc_limit = 1
815 ui.update_config(options)
816 return real_main(options, args)
817
818
819 if __name__ == "__main__":
820 sys.exit(main() or 0)