]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn/run/svn2svn.py
Improve optparse formatting
[svn2svn.git] / svn2svn / run / svn2svn.py
1 """
2 Replicate (replay) changesets from one SVN repository to another:
3 * Maintains full logical history (e.g. uses "svn copy" for renames).
4 * Maintains original commit messages.
5 * Optionally maintain source author info. (Only supported if accessing
6 target SVN repo via file://)
7 * Cannot maintain original commit date, but appends original commit date
8 for each commit message: "Date: %d".
9 * Optionally run an external shell script before each replayed commit
10 to give the ability to dynamically exclude or modify files as part
11 of the replay.
12
13 License: GPLv3, same as hgsvn (https://bitbucket.org/andialbrecht/hgsvn)
14 Author: Tony Duckles (https://github.com/tonyduckles/svn2svn)
15 (Inspired by http://code.google.com/p/svn2svn/, and uses code for hgsvn
16 for SVN client handling)
17 """
18
19 from .. import base_version, full_version
20 from .. import ui
21 from .. import svnclient
22 from ..shell import run_svn
23 from ..errors import (ExternalCommandFailed, UnsupportedSVNAction, InternalError, VerificationError)
24 from parse import HelpFormatter
25
26 import sys
27 import os
28 import time
29 import traceback
30 import shutil
31 import operator
32 import optparse
33 from datetime import datetime
34
35 _valid_svn_actions = "MARD" # The list of known SVN action abbr's, from "svn log"
36
37 def commit_from_svn_log_entry(log_entry, options, commit_paths=None, target_revprops=None):
38 """
39 Given an SVN log entry and an optional list of changed paths, do an svn commit.
40 """
41 # TODO: Run optional external shell hook here, for doing pre-commit filtering
42 # Display the _wc_target "svn status" info if running in -vv (or higher) mode
43 if ui.get_level() >= ui.EXTRA:
44 ui.status(">> commit_from_svn_log_entry: Pre-commit _wc_target status:", level=ui.EXTRA, color='CYAN')
45 ui.status(run_svn(["status"]), level=ui.EXTRA, color='CYAN')
46 # This will use the local timezone for displaying commit times
47 timestamp = int(log_entry['date'])
48 svn_date = str(datetime.fromtimestamp(timestamp))
49 # Uncomment this one one if you prefer UTC commit times
50 #svn_date = "%d 0" % timestamp
51 args = ["commit", "--force-log"]
52 if options.keep_author:
53 args += ["-m", log_entry['message'] + "\nDate: " + svn_date, "--username", log_entry['author']]
54 else:
55 args += ["-m", log_entry['message'] + "\nDate: " + svn_date + "\nAuthor: " + log_entry['author']]
56 revprops = {}
57 if log_entry['revprops']:
58 # Carry forward any revprop's from the source revision
59 for v in log_entry['revprops']:
60 revprops[v['name']] = v['value']
61 if target_revprops:
62 # Add any extra revprop's we want to set for the target repo commits
63 for v in target_revprops:
64 revprops[v['name']] = v['value']
65 if revprops:
66 for key in revprops:
67 args += ["--with-revprop", "%s=%s" % (key, str(revprops[key]))]
68 if commit_paths:
69 if len(commit_paths)<100:
70 # If we don't have an excessive amount of individual changed paths, pass
71 # those to the "svn commit" command. Else, pass nothing so we commit at
72 # the root of the working-copy.
73 args += list(commit_paths)
74 rev = None
75 if not options.dry_run:
76 # Run the "svn commit" command, and screen-scrape the target_rev value (if any)
77 output = run_svn(args)
78 if output:
79 output_lines = output.strip("\n").split("\n")
80 rev = ""
81 for line in output_lines:
82 if line[0:19] == 'Committed revision ':
83 rev = line[19:].rstrip('.')
84 break
85 if rev:
86 ui.status("Committed revision %s.", rev)
87 return rev
88
89 def full_svn_revert():
90 """
91 Do an "svn revert" and proactively remove any extra files in the working copy.
92 """
93 run_svn(["revert", "--recursive", "."])
94 output = run_svn(["status"])
95 if output:
96 output_lines = output.strip("\n").split("\n")
97 for line in output_lines:
98 if line[0] == "?":
99 path = line[4:].strip(" ")
100 if os.path.isfile(path):
101 os.remove(path)
102 if os.path.isdir(path):
103 shutil.rmtree(path)
104
105 def gen_tracking_revprops(source_repos_uuid, source_url, source_rev):
106 """
107 Build an array of svn2svn-specific source-tracking revprops.
108 """
109 revprops = [{'name':'svn2svn:source_uuid', 'value':source_repos_uuid},
110 {'name':'svn2svn:source_url', 'value':source_url},
111 {'name':'svn2svn:source_rev', 'value':source_rev}]
112 return revprops
113
114 def in_svn(p, require_in_repo=False, prefix=""):
115 """
116 Check if a given file/folder is being tracked by Subversion.
117 Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
118 With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
119 Use "svn status" to check the status of the file/folder.
120 """
121 entries = svnclient.get_svn_status(p, no_recursive=True)
122 if not entries:
123 return False
124 d = entries[0]
125 if require_in_repo and (d['status'] == 'added' or d['revision'] is None):
126 # If caller requires this path to be in the SVN repo, prevent returning True
127 # for paths that are only locally-added.
128 ret = False
129 else:
130 # Don't consider files tracked as deleted in the WC as under source-control.
131 # Consider files which are locally added/copied as under source-control.
132 ret = True if not (d['status'] == 'deleted') and (d['type'] == 'normal' or d['status'] == 'added' or d['copied'] == 'true') else False
133 ui.status(prefix + ">> in_svn('%s', require_in_repo=%s) --> %s", p, str(require_in_repo), str(ret), level=ui.DEBUG, color='GREEN')
134 return ret
135
136 def find_svn_ancestors(svn_repos_url, base_path, source_path, source_rev, prefix = ""):
137 """
138 Given a source path, walk the SVN history backwards to inspect the ancestory of
139 that path, seeing if it traces back to base_path. Build an array of copyfrom_path
140 and copyfrom_revision pairs for each of the "svn copies". If we find a copyfrom_path
141 which base_path is a substring match of (e.g. we crawled back to the initial branch-
142 copy from trunk), then return the collection of ancestor paths. Otherwise,
143 copyfrom_path has no ancestory compared to base_path.
144
145 This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
146 file/folder was renamed in a branch and then that branch was merged back to trunk.
147
148 'svn_repos_url' is the full URL to the root of the SVN repository,
149 e.g. 'file:///path/to/repo'
150 'base_path' is the path in the SVN repo to the target path we're trying to
151 trace ancestry back to, e.g. 'trunk'.
152 'source_path' is the path in the SVN repo to the source path to start checking
153 ancestry at, e.g. 'branches/fix1/projectA/file1.txt'.
154 (full_path = svn_repos_url+base_path+"/"+path_offset)
155 'source_rev' is the revision to start walking the history of source_path backwards from.
156 """
157 ui.status(prefix + ">> find_svn_ancestors: Start: (%s) source_path: %s base_path: %s",
158 svn_repos_url, source_path+"@"+str(source_rev), base_path, level=ui.DEBUG, color='YELLOW')
159 done = False
160 working_path = base_path+"/"+source_path
161 working_rev = source_rev
162 first_iter_done = False
163 ancestors_temp = []
164 while not done:
165 # Get the first "svn log" entry for this path (relative to @rev)
166 ui.status(prefix + ">> find_svn_ancestors: %s", svn_repos_url + working_path+"@"+str(working_rev), level=ui.DEBUG, color='YELLOW')
167 log_entry = svnclient.get_first_svn_log_entry(svn_repos_url + working_path+"@"+str(working_rev), 1, working_rev, True)
168 if not log_entry:
169 ui.status(prefix + ">> find_svn_ancestors: Done: no log_entry", level=ui.DEBUG, color='YELLOW')
170 done = True
171 break
172 # If we found a copy-from case which matches our base_path, we're done.
173 # ...but only if we've at least tried to search for the first copy-from path.
174 if first_iter_done and working_path.startswith(base_path):
175 ui.status(prefix + ">> find_svn_ancestors: Done: Found working_path.startswith(base_path) and first_iter_done=True", level=ui.DEBUG, color='YELLOW')
176 done = True
177 break
178 first_iter_done = True
179 # Search for any actions on our target path (or parent paths).
180 changed_paths_temp = []
181 for d in log_entry['changed_paths']:
182 path = d['path']
183 if path in working_path:
184 changed_paths_temp.append({'path': path, 'data': d})
185 if not changed_paths_temp:
186 # If no matches, then we've hit the end of the chain and this path has no ancestry back to base_path.
187 ui.status(prefix + ">> find_svn_ancestors: Done: No matching changed_paths", level=ui.DEBUG, color='YELLOW')
188 done = True
189 continue
190 # Reverse-sort any matches, so that we start with the most-granular (deepest in the tree) path.
191 changed_paths = sorted(changed_paths_temp, key=operator.itemgetter('path'), reverse=True)
192 # Find the action for our working_path in this revision. Use a loop to check in reverse order,
193 # so that if the target file/folder is "M" but has a parent folder with an "A" copy-from.
194 for v in changed_paths:
195 d = v['data']
196 path = d['path']
197 # Check action-type for this file
198 action = d['action']
199 if action not in _valid_svn_actions:
200 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
201 % (log_entry['revision'], action))
202 ui.status(prefix + "> %s %s%s", action, path,
203 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
204 level=ui.DEBUG, color='YELLOW')
205 if action == 'D':
206 # If file/folder was deleted, it has no ancestor
207 ancestors_temp = []
208 ui.status(prefix + ">> find_svn_ancestors: Done: deleted", level=ui.DEBUG, color='YELLOW')
209 done = True
210 break
211 if action in 'RA':
212 # If file/folder was added/replaced but not a copy, it has no ancestor
213 if not d['copyfrom_path']:
214 ancestors_temp = []
215 ui.status(prefix + ">> find_svn_ancestors: Done: %s with no copyfrom_path",
216 "Added" if action == "A" else "Replaced",
217 level=ui.DEBUG, color='YELLOW')
218 done = True
219 break
220 # Else, file/folder was added/replaced and is a copy, so add an entry to our ancestors list
221 # and keep checking for ancestors
222 ui.status(prefix + ">> find_svn_ancestors: Found copy-from (action=%s): %s --> %s",
223 action, path, d['copyfrom_path']+"@"+str(d['copyfrom_revision']),
224 level=ui.DEBUG, color='YELLOW')
225 ancestors_temp.append({'path': path, 'revision': log_entry['revision'],
226 'copyfrom_path': d['copyfrom_path'], 'copyfrom_rev': d['copyfrom_revision']})
227 working_path = working_path.replace(d['path'], d['copyfrom_path'])
228 working_rev = d['copyfrom_revision']
229 # Follow the copy and keep on searching
230 break
231 ancestors = []
232 if ancestors_temp:
233 ancestors.append({'path': base_path+"/"+source_path, 'revision': source_rev})
234 working_path = base_path+"/"+source_path
235 for idx in range(len(ancestors_temp)):
236 d = ancestors_temp[idx]
237 working_path = working_path.replace(d['path'], d['copyfrom_path'])
238 working_rev = d['copyfrom_rev']
239 ancestors.append({'path': working_path, 'revision': working_rev})
240 if ui.get_level() >= ui.DEBUG:
241 max_len = 0
242 for idx in range(len(ancestors)):
243 d = ancestors[idx]
244 max_len = max(max_len, len(d['path']+"@"+str(d['revision'])))
245 ui.status(prefix + ">> find_svn_ancestors: Found parent ancestors:", level=ui.DEBUG, color='YELLOW_B')
246 for idx in range(len(ancestors)-1):
247 d = ancestors[idx]
248 d_next = ancestors[idx+1]
249 ui.status(prefix + " [%s] %s <-- %s", idx,
250 str(d['path']+"@"+str(d['revision'])).ljust(max_len),
251 str(d_next['path']+"@"+str(d_next['revision'])).ljust(max_len),
252 level=ui.DEBUG, color='YELLOW')
253 else:
254 ui.status(prefix + ">> find_svn_ancestors: No ancestor-chain found: %s",
255 svn_repos_url+base_path+"/"+source_path+"@"+str(source_rev), level=ui.DEBUG, color='YELLOW')
256 return ancestors
257
258 def get_rev_map(rev_map, source_rev, prefix):
259 """
260 Find the equivalent rev # in the target repo for the given rev # from the source repo.
261 """
262 ui.status(prefix + ">> get_rev_map(%s)", source_rev, level=ui.DEBUG, color='GREEN')
263 # Find the highest entry less-than-or-equal-to source_rev
264 for rev in range(int(source_rev), 0, -1):
265 ui.status(prefix + ">> get_rev_map: rev=%s in_rev_map=%s", rev, str(rev in rev_map), level=ui.DEBUG, color='BLACK_B')
266 if rev in rev_map:
267 return int(rev_map[rev])
268 # Else, we fell off the bottom of the rev_map. Ruh-roh...
269 return None
270
271 def set_rev_map(rev_map, source_rev, target_rev):
272 ui.status(">> set_rev_map: source_rev=%s target_rev=%s", source_rev, target_rev, level=ui.DEBUG, color='GREEN')
273 rev_map[int(source_rev)]=int(target_rev)
274
275 def build_rev_map(target_url, source_info):
276 """
277 Check for any already-replayed history from source_url (source_info) and
278 build the mapping-table of source_rev -> target_rev.
279 """
280 rev_map = {}
281 ui.status("Rebuilding rev_map...", level=ui.VERBOSE)
282 proc_count = 0
283 it_log_entries = svnclient.iter_svn_log_entries(target_url, 1, 'HEAD', get_changed_paths=False, get_revprops=True)
284 for log_entry in it_log_entries:
285 if log_entry['revprops']:
286 revprops = {}
287 for v in log_entry['revprops']:
288 if v['name'].startswith('svn2svn:'):
289 revprops[v['name']] = v['value']
290 if revprops['svn2svn:source_uuid'] == source_info['repos_uuid'] and \
291 revprops['svn2svn:source_url'] == source_info['url']:
292 source_rev = revprops['svn2svn:source_rev']
293 target_rev = log_entry['revision']
294 set_rev_map(rev_map, source_rev, target_rev)
295 return rev_map
296
297 def get_svn_dirlist(svn_path, svn_rev = ""):
298 """
299 Get a list of all the child contents (recusive) of the given folder path.
300 """
301 args = ["list"]
302 path = svn_path
303 if svn_rev:
304 args += ["-r", svn_rev]
305 path += "@"+str(svn_rev)
306 args += [path]
307 paths = run_svn(args, no_fail=True)
308 paths = paths.strip("\n").split("\n") if len(paths)>1 else []
309 return paths
310
311 def path_in_list(paths, path):
312 for p in paths:
313 if path.startswith(p):
314 return True
315 return False
316
317 def add_path(paths, path):
318 if not path_in_list(paths, path):
319 paths.append(path)
320
321 def do_svn_add(source_repos_url, source_url, path_offset, target_url, source_rev, \
322 parent_copyfrom_path="", parent_copyfrom_rev="", export_paths={}, \
323 rev_map={}, is_dir = False, prefix = ""):
324 """
325 Given the add'd source path, replay the "svn add/copy" commands to correctly
326 track renames across copy-from's.
327
328 For example, consider a sequence of events like this:
329 1. svn copy /trunk /branches/fix1
330 2. (Make some changes on /branches/fix1)
331 3. svn mv /branches/fix1/Proj1 /branches/fix1/Proj2 " Rename folder
332 4. svn mv /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt " Rename file inside renamed folder
333 5. svn co /trunk && svn merge /branches/fix1
334 After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
335 and and add of /trunk/Proj2 copy-from /branches/fix1/Proj2. If we were just
336 to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
337 we'd lose the logical history that Proj2/file2.txt is really a descendant
338 of Proj1/file1.txt.
339
340 'source_repos_url' is the full URL to the root of the source repository.
341 'source_url' is the full URL to the source path in the source repository.
342 'path_offset' is the offset from source_base to the file to check ancestry for,
343 e.g. 'projectA/file1.txt'. path = source_repos_url + source_base + path_offset.
344 'target_url' is the full URL to the target path in the target repository.
345 'source_rev' is the revision ("svn log") that we're processing from the source repo.
346 'parent_copyfrom_path' and 'parent_copyfrom_rev' is the copy-from path of the parent
347 directory, when being called recursively by do_svn_add_dir().
348 'export_paths' is the list of path_offset's that we've deferred running "svn export" on.
349 'rev_map' is the running mapping-table dictionary for source-repo rev #'s
350 to the equivalent target-repo rev #'s.
351 'is_dir' is whether path_offset is a directory (rather than a file).
352 """
353 source_base = source_url[len(source_repos_url):]
354 ui.status(prefix + ">> do_svn_add: %s %s", source_base+"/"+path_offset+"@"+str(source_rev),
355 " (parent-copyfrom: "+parent_copyfrom_path+"@"+str(parent_copyfrom_rev)+")" if parent_copyfrom_path else "",
356 level=ui.DEBUG, color='GREEN')
357 # Check if the given path has ancestors which chain back to the current source_base
358 found_ancestor = False
359 ancestors = find_svn_ancestors(source_repos_url, source_base, path_offset, source_rev, prefix+" ")
360 # ancestors[n] is the original (pre-branch-copy) trunk path.
361 # ancestors[n-1] is the first commit on the new branch.
362 copyfrom_path = ancestors[len(ancestors)-1]['path'] if ancestors else ""
363 copyfrom_rev = ancestors[len(ancestors)-1]['revision'] if ancestors else ""
364 if ancestors:
365 # The copy-from path has ancestory back to source_url.
366 ui.status(prefix + ">> do_svn_add: Check copy-from: Found parent: %s", copyfrom_path+"@"+str(copyfrom_rev),
367 level=ui.DEBUG, color='GREEN', bold=True)
368 found_ancestor = True
369 # Map the copyfrom_rev (source repo) to the equivalent target repo rev #. This can
370 # return None in the case where copyfrom_rev is *before* our source_start_rev.
371 tgt_rev = get_rev_map(rev_map, copyfrom_rev, prefix+" ")
372 ui.status(prefix + ">> do_svn_add: get_rev_map: %s (source) -> %s (target)", copyfrom_rev, tgt_rev, level=ui.DEBUG, color='GREEN')
373 else:
374 ui.status(prefix + ">> do_svn_add: Check copy-from: No ancestor chain found.", level=ui.DEBUG, color='GREEN')
375 found_ancestor = False
376 if found_ancestor and tgt_rev:
377 # Check if this path_offset in the target WC already has this ancestry, in which
378 # case there's no need to run the "svn copy" (again).
379 path_in_svn = in_svn(path_offset, prefix=prefix+" ")
380 log_entry = svnclient.get_last_svn_log_entry(path_offset, 1, 'HEAD', get_changed_paths=False) if in_svn(path_offset, require_in_repo=True, prefix=prefix+" ") else []
381 if (not log_entry or (log_entry['revision'] != tgt_rev)):
382 copyfrom_offset = copyfrom_path[len(source_base):].strip('/')
383 ui.status(prefix + ">> do_svn_add: svn_copy: Copy-from: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
384 ui.status(prefix + " copyfrom: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
385 ui.status(prefix + " p_copyfrom: %s", parent_copyfrom_path+"@"+str(parent_copyfrom_rev) if parent_copyfrom_path else "", level=ui.DEBUG, color='GREEN')
386 if path_in_svn and \
387 ((parent_copyfrom_path and copyfrom_path.startswith(parent_copyfrom_path)) and \
388 (parent_copyfrom_rev and copyfrom_rev == parent_copyfrom_rev)):
389 # When being called recursively, if this child entry has the same ancestor as the
390 # the parent, then no need to try to run another "svn copy".
391 ui.status(prefix + ">> do_svn_add: svn_copy: Same ancestry as parent: %s",
392 parent_copyfrom_path+"@"+str(parent_copyfrom_rev),level=ui.DEBUG, color='GREEN')
393 pass
394 else:
395 # Copy this path from the equivalent path+rev in the target repo, to create the
396 # equivalent history.
397 if parent_copyfrom_path:
398 # If we have a parent copy-from path, we mis-match that so display a status
399 # message describing the action we're mimic'ing. If path_in_svn, then this
400 # is logically a "replace" rather than an "add".
401 ui.status(" %s %s (from %s)", ('R' if path_in_svn else 'A'), source_base+"/"+path_offset, ancestors[1]['path']+"@"+str(copyfrom_rev), level=ui.VERBOSE)
402 if path_in_svn:
403 # If local file is already under version-control, then this is a replace.
404 ui.status(prefix + ">> do_svn_add: pre-copy: local path already exists: %s", path_offset, level=ui.DEBUG, color='GREEN')
405 run_svn(["remove", "--force", path_offset])
406 run_svn(["copy", "-r", tgt_rev, target_url+"/"+copyfrom_offset+"@"+str(tgt_rev), path_offset])
407 # Export the final version of this file/folder from the source repo, to make
408 # sure we're up-to-date.
409 add_path(export_paths, path_offset)
410 else:
411 ui.status(prefix + ">> do_svn_add: Skipped 'svn copy': %s", path_offset, level=ui.DEBUG, color='GREEN')
412 else:
413 # Else, either this copy-from path has no ancestry back to source_url OR copyfrom_rev comes
414 # before our initial source_start_rev (i.e. tgt_rev == None), so can't do a "svn copy".
415 # Create (parent) directory if needed.
416 # TODO: This is (nearly) a duplicate of code in process_svn_log_entry(). Should this be
417 # split-out to a shared tag?
418 p_path = path_offset if is_dir else os.path.dirname(path_offset).strip() or '.'
419 if not os.path.exists(p_path):
420 run_svn(["mkdir", p_path])
421 if not in_svn(path_offset, prefix=prefix+" "):
422 if is_dir:
423 # Export the final verison of all files in this folder.
424 add_path(export_paths, path_offset)
425 else:
426 # Export the final verison of this file. We *need* to do this before running
427 # the "svn add", even if we end-up re-exporting this file again via export_paths.
428 run_svn(["export", "--force", "-r", source_rev,
429 source_repos_url+source_base+"/"+path_offset+"@"+str(source_rev), path_offset])
430 # If not already under version-control, then "svn add" this file/folder.
431 run_svn(["add", "--parents", path_offset])
432 # TODO: Need to copy SVN properties from source repos
433 if is_dir:
434 # For any folders that we process, process any child contents, so that we correctly
435 # replay copies/replaces/etc.
436 do_svn_add_dir(source_repos_url, source_url, path_offset, source_rev, target_url,
437 copyfrom_path, copyfrom_rev, export_paths, rev_map, prefix+" ")
438
439 def do_svn_add_dir(source_repos_url, source_url, path_offset, source_rev, target_url, \
440 parent_copyfrom_path, parent_copyfrom_rev, export_paths, rev_map, prefix=""):
441 source_base = source_url[len(source_repos_url):]
442 # Get the directory contents, to compare between the local WC (target_url) vs. the remote repo (source_url)
443 # TODO: paths_local won't include add'd paths because "svn ls" lists the contents of the
444 # associated remote repo folder. (Is this a problem?)
445 paths_local = get_svn_dirlist(path_offset)
446 paths_remote = get_svn_dirlist(source_url+"/"+path_offset, source_rev)
447 ui.status(prefix + ">> do_svn_add_dir: paths_local: %s", str(paths_local), level=ui.DEBUG, color='GREEN')
448 ui.status(prefix + ">> do_svn_add_dir: paths_remote: %s", str(paths_remote), level=ui.DEBUG, color='GREEN')
449 # Update files/folders which exist in remote but not local
450 for path in paths_remote:
451 path_is_dir = True if path[-1] == "/" else False
452 working_path = path_offset+"/"+(path.rstrip('/') if path_is_dir else path)
453 do_svn_add(source_repos_url, source_url, working_path, target_url, source_rev,
454 parent_copyfrom_path, parent_copyfrom_rev, export_paths,
455 rev_map, path_is_dir, prefix+" ")
456 # Remove files/folders which exist in local but not remote
457 for path in paths_local:
458 if not path in paths_remote:
459 ui.status(" %s %s", 'D', source_base+"/"+path_offset+"/"+path, level=ui.VERBOSE)
460 run_svn(["remove", "--force", path_offset+"/"+path])
461 # TODO: Does this handle deleted folders too? Wouldn't want to have a case
462 # where we only delete all files from folder but leave orphaned folder around.
463
464 def process_svn_log_entry(log_entry, source_repos_url, source_url, target_url, \
465 rev_map, options, commit_paths = [], prefix = ""):
466 """
467 Process SVN changes from the given log entry.
468 Returns array of all the paths in the working-copy that were changed,
469 i.e. the paths which need to be "svn commit".
470
471 'log_entry' is the array structure built by parse_svn_log_xml().
472 'source_repos_url' is the full URL to the root of the source repository.
473 'source_url' is the full URL to the source path in the source repository.
474 'target_url' is the full URL to the target path in the target repository.
475 'rev_map' is the running mapping-table dictionary for source-repo rev #'s
476 to the equivalent target-repo rev #'s.
477 'commit_paths' is the working list of specific paths which changes to pass
478 to the final "svn commit".
479 """
480 export_paths = []
481 # Get the relative offset of source_url based on source_repos_url
482 # e.g. '/branches/bug123'
483 source_base = source_url[len(source_repos_url):]
484 source_rev = log_entry['revision']
485 ui.status(prefix + ">> process_svn_log_entry: %s", source_url+"@"+str(source_rev), level=ui.DEBUG, color='GREEN')
486 for d in log_entry['changed_paths']:
487 # Get the full path for this changed_path
488 # e.g. '/branches/bug123/projectA/file1.txt'
489 path = d['path']
490 if not path.startswith(source_base + "/"):
491 # Ignore changed files that are not part of this subdir
492 if path != source_base:
493 ui.status(prefix + ">> process_svn_log_entry: Unrelated path: %s (base: %s)", path, source_base, level=ui.DEBUG, color='GREEN')
494 continue
495 assert len(d['kind'])>0
496 path_is_dir = True if d['kind'] == 'dir' else False
497 # Calculate the offset (based on source_base) for this changed_path
498 # e.g. 'projectA/file1.txt'
499 # (path = source_base + "/" + path_offset)
500 path_offset = path[len(source_base):].strip("/")
501 # Get the action for this path
502 action = d['action']
503 if action not in _valid_svn_actions:
504 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
505 % (source_rev, action))
506 ui.status(" %s %s%s", action, d['path'],
507 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
508 level=ui.VERBOSE)
509
510 # Try to be efficient and keep track of an explicit list of paths in the
511 # working copy that changed. If we commit from the root of the working copy,
512 # then SVN needs to crawl the entire working copy looking for pending changes.
513 add_path(commit_paths, path_offset)
514
515 # Special-handling for replace's
516 if action == 'R':
517 # If file was "replaced" (deleted then re-added, all in same revision),
518 # then we need to run the "svn rm" first, then change action='A'. This
519 # lets the normal code below handle re-"svn add"'ing the files. This
520 # should replicate the "replace".
521 run_svn(["remove", "--force", path_offset])
522 action = 'A'
523
524 # Handle all the various action-types
525 # (Handle "add" first, for "svn copy/move" support)
526 if action == 'A':
527 # Determine where to export from.
528 svn_copy = False
529 # Handle cases where this "add" was a copy from another URL in the source repos
530 if d['copyfrom_revision']:
531 copyfrom_path = d['copyfrom_path']
532 copyfrom_rev = d['copyfrom_revision']
533 do_svn_add(source_repos_url, source_url, path_offset, target_url, source_rev,
534 "", "", export_paths, rev_map, path_is_dir, prefix+" ")
535 # Else just "svn export" the files from the source repo and "svn add" them.
536 else:
537 # Create (parent) directory if needed
538 p_path = path_offset if path_is_dir else os.path.dirname(path_offset).strip() or '.'
539 if not os.path.exists(p_path):
540 run_svn(["mkdir", p_path])
541 # Export the entire added tree.
542 if path_is_dir:
543 # For directories, defer the (recurisve) "svn export". Might have a
544 # situation in a branch merge where the entry in the svn-log is a
545 # non-copy-from'd "add" but there are child contents (that we haven't
546 # gotten to yet in log_entry) that are copy-from's. When we try do
547 # the "svn copy" later on in do_svn_add() for those copy-from'd paths,
548 # having pre-existing (svn-add'd) contents creates some trouble.
549 # Instead, just create the stub folders ("svn mkdir" above) and defer
550 # exporting the final file-state until the end.
551 add_path(export_paths, path_offset)
552 else:
553 # Export the final verison of this file. We *need* to do this before running
554 # the "svn add", even if we end-up re-exporting this file again via export_paths.
555 run_svn(["export", "--force", "-r", source_rev,
556 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
557 if not in_svn(path_offset, prefix=prefix+" "):
558 # Need to use in_svn here to handle cases where client committed the parent
559 # folder and each indiv sub-folder.
560 run_svn(["add", "--parents", path_offset])
561 # TODO: Need to copy SVN properties from source repos
562
563 elif action == 'D':
564 run_svn(["remove", "--force", path_offset])
565
566 elif action == 'M':
567 # TODO: Is "svn merge -c" correct here? Should this just be an "svn export" plus
568 # proplist updating?
569 out = run_svn(["merge", "-c", source_rev, "--non-recursive",
570 "--non-interactive", "--accept=theirs-full",
571 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
572
573 else:
574 raise InternalError("Internal Error: process_svn_log_entry: Unhandled 'action' value: '%s'"
575 % action)
576
577 # Export the final version of all add'd paths from source_url
578 if export_paths:
579 for path_offset in export_paths:
580 run_svn(["export", "--force", "-r", source_rev,
581 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
582
583 return commit_paths
584
585 def disp_svn_log_summary(log_entry):
586 ui.status("")
587 ui.status("r%s | %s | %s",
588 log_entry['revision'],
589 log_entry['author'],
590 str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' ')))
591 ui.status(log_entry['message'])
592 ui.status("------------------------------------------------------------------------")
593
594 def real_main(options, args):
595 source_url = args.pop(0).rstrip("/")
596 target_url = args.pop(0).rstrip("/")
597 ui.status("options: %s", str(options), level=ui.DEBUG, color='GREEN')
598
599 # Make sure that both the source and target URL's are valid
600 source_info = svnclient.get_svn_info(source_url)
601 assert source_url.startswith(source_info['repos_url'])
602 target_info = svnclient.get_svn_info(target_url)
603 assert target_url.startswith(target_info['repos_url'])
604
605 source_end_rev = source_info['revision'] # Get the last revision # for the source repo
606 source_repos_url = source_info['repos_url'] # Get the base URL for the source repo, e.g. 'svn://svn.example.com/svn/repo'
607 source_repos_uuid = source_info['repos_uuid'] # Get the UUID for the source repo
608
609 wc_target = os.path.abspath('_wc_target')
610 rev_map = {}
611 num_entries_proc = 0
612 commit_count = 0
613 source_rev = None
614 target_rev = None
615
616 # Check out a working copy of target_url if needed
617 wc_exists = os.path.exists(wc_target)
618 if wc_exists and not options.cont_from_break:
619 shutil.rmtree(wc_target)
620 wc_exists = False
621 if not wc_exists:
622 svnclient.svn_checkout(target_url, wc_target)
623 os.chdir(wc_target)
624
625 if not options.cont_from_break:
626 # TODO: Warn user if trying to start (non-continue) into a non-empty target path?
627 # Get log entry for the SVN revision we will check out
628 if options.svn_rev:
629 # If specify a rev, get log entry just before or at rev
630 source_start_log = svnclient.get_last_svn_log_entry(source_url, 1, options.svn_rev, False)
631 else:
632 # Otherwise, get log entry of branch creation
633 # Note: Trying to use svnclient.get_first_svn_log_entry(source_url, 1, source_end_rev, False)
634 # ends-up being *VERY* time-consuming on a repo with lots of revisions. Even though
635 # the "svn log" call is passing --limit 1, it seems like that limit-filter is happening
636 # _after_ svn has fetched the full log history. Instead, search the history in chunks
637 # and write some progress to the screen.
638 ui.status("Searching for start source revision (%s)...", source_url, level=ui.VERBOSE)
639 rev = 1
640 chunk_size = 1000
641 done = False
642 while not done:
643 entries = svnclient.run_svn_log(source_url, rev, min(rev+chunk_size-1, target_info['revision']), 1, get_changed_paths=False)
644 if entries:
645 source_start_log = entries[0]
646 done = True
647 break
648 ui.status("...%s...", rev)
649 rev = rev+chunk_size
650 if rev > target_info['revision']:
651 done = True
652 if not source_start_log:
653 raise InternalError("Unable to find first revision for source_url: %s" % source_url)
654
655 # This is the revision we will start from for source_url
656 source_start_rev = source_rev = int(source_start_log['revision'])
657 ui.status("Starting at source revision %s.", source_start_rev, level=ui.VERBOSE)
658
659 # For the initial commit to the target URL, export all the contents from
660 # the source URL at the start-revision.
661 paths = run_svn(["list", "-r", source_rev, source_url+"@"+str(source_rev)])
662 if len(paths)>1:
663 disp_svn_log_summary(svnclient.get_one_svn_log_entry(source_url, source_rev, source_rev))
664 ui.status("(Initial import)", level=ui.VERBOSE)
665 paths = paths.strip("\n").split("\n")
666 for path_raw in paths:
667 # For each top-level file/folder...
668 if not path_raw:
669 continue
670 # Directories have a trailing slash in the "svn list" output
671 path_is_dir = True if path_raw[-1] == "/" else False
672 path = path_raw.rstrip('/') if path_is_dir else path_raw
673 if path_is_dir and not os.path.exists(path):
674 os.makedirs(path)
675 ui.status(" A %s", source_url[len(source_repos_url):]+"/"+path, level=ui.VERBOSE)
676 run_svn(["export", "--force", "-r" , source_rev, source_url+"/"+path+"@"+str(source_rev), path])
677 run_svn(["add", path])
678 num_entries_proc += 1
679 target_revprops = gen_tracking_revprops(source_repos_uuid, source_url, source_rev) # Build source-tracking revprop's
680 target_rev = commit_from_svn_log_entry(source_start_log, options, target_revprops=target_revprops)
681 if target_rev:
682 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
683 set_rev_map(rev_map, source_rev, target_rev)
684 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
685 run_svn(["update"])
686 commit_count += 1
687 else:
688 # Re-build the rev_map based on any already-replayed history in target_url
689 rev_map = build_rev_map(target_url, source_info)
690 if not rev_map:
691 raise RuntimeError("Called with continue-mode, but no already-replayed history found in target repo: %s" % target_url)
692 source_start_rev = int(max(rev_map, key=rev_map.get))
693 assert source_start_rev
694 ui.status("Continuing from source revision %s.", source_start_rev, level=ui.VERBOSE)
695
696 svn_vers_t = svnclient.get_svn_client_version()
697 svn_vers = float(".".join(map(str, svn_vers_t[0:2])))
698
699 # Load SVN log starting from source_start_rev + 1
700 it_log_entries = svnclient.iter_svn_log_entries(source_url, source_start_rev+1, source_end_rev, get_revprops=True)
701 source_rev = None
702
703 try:
704 for log_entry in it_log_entries:
705 if options.entries_proc_limit:
706 if num_entries_proc >= options.entries_proc_limit:
707 break
708 # Replay this revision from source_url into target_url
709 disp_svn_log_summary(log_entry)
710 source_rev = log_entry['revision']
711 # Process all the changed-paths in this log entry
712 commit_paths = []
713 process_svn_log_entry(log_entry, source_repos_url, source_url, target_url,
714 rev_map, options, commit_paths)
715 num_entries_proc += 1
716 # Commit any changes made to _wc_target
717 target_revprops = gen_tracking_revprops(source_repos_uuid, source_url, source_rev) # Build source-tracking revprop's
718 target_rev = commit_from_svn_log_entry(log_entry, options, commit_paths, target_revprops=target_revprops)
719 if target_rev:
720 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
721 source_rev = log_entry['revision']
722 set_rev_map(rev_map, source_rev, target_rev)
723 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
724 run_svn(["update"])
725 commit_count += 1
726 # Run "svn cleanup" every 100 commits if SVN 1.7+, to clean-up orphaned ".svn/pristines/*"
727 if svn_vers >= 1.7 and (commit_count % 100 == 0):
728 run_svn(["cleanup"])
729 if not source_rev:
730 # If there were no new source_url revisions to process, init source_rev
731 # for the "finally" message below.
732 source_rev = source_end_rev
733
734 except KeyboardInterrupt:
735 print "\nStopped by user."
736 print "\nCleaning-up..."
737 run_svn(["cleanup"])
738 full_svn_revert()
739 except:
740 print "\nCommand failed with following error:\n"
741 traceback.print_exc()
742 print "\nCleaning-up..."
743 run_svn(["cleanup"])
744 print run_svn(["status"])
745 full_svn_revert()
746 finally:
747 print "\nFinished at source revision %s." % source_rev
748
749 def main():
750 # Defined as entry point. Must be callable without arguments.
751 usage = "Usage: %prog [OPTIONS] source_url target_url"
752 description = """\
753 Replicate (replay) history from one SVN repository to another. Maintain
754 logical ancestry wherever possible, so that 'svn log' on the replayed
755 repo will correctly follow file/folder renames.
756
757 == Examples ==
758 Create a copy of only /trunk from source repo, starting at r5000
759 $ svnadmin create /svn/target
760 $ svn mkdir -m 'Add trunk' file:///svn/target/trunk
761 $ svn2svn -av -r 5000 http://server/source/trunk file:///svn/target/trunk
762 1. The target_url will be checked-out to ./_wc_target
763 2. The first commit to http://server/source/trunk at/after r5000 will be
764 exported & added into _wc_target
765 3. All revisions affecting http://server/source/trunk (starting at r5000)
766 will be replayed to _wc_target. Any add/copy/move/replaces that are
767 copy-from'd some path outside of /trunk (e.g. files renamed on a
768 /branch and branch was merged into /trunk) will correctly maintain
769 logical ancestry where possible.
770
771 Use continue-mode (-c) to pick-up where the last run left-off
772 $ svn2svn -avc http://server/source/trunk file:///svn/target/trunk
773 1. The target_url will be checked-out to ./_wc_target, if not already
774 checked-out
775 2. All new revisions affecting http://server/source/trunk starting from
776 the last replayed revision to file:///svn/target/trunk (based on the
777 svn2svn:* revprops) will be replayed to _wc_target, maintaining all
778 logical ancestry where possible."""
779 parser = optparse.OptionParser(usage, description=description,
780 formatter=HelpFormatter(), version="%prog "+str(full_version))
781 #parser.remove_option("--help")
782 #parser.add_option("-h", "--help", dest="show_help", action="store_true",
783 # help="show this help message and exit")
784 parser.add_option("-r", "--revision", type="int", dest="svn_rev", metavar="REV",
785 help="initial SVN revision to start source_url replay")
786 parser.add_option("-a", "--keep-author", action="store_true", dest="keep_author", default=False,
787 help="maintain original 'Author' info from source repo")
788 parser.add_option("-c", "--continue", action="store_true", dest="cont_from_break",
789 help="continue from previous break")
790 parser.add_option("-l", "--limit", type="int", dest="entries_proc_limit", metavar="NUM",
791 help="maximum number of log entries to process")
792 parser.add_option("-n", "--dry-run", action="store_true", dest="dry_run", default=False,
793 help="try processing next log entry but don't commit changes to "
794 "target working-copy (forces --limit=1)")
795 parser.add_option("-v", "--verbose", dest="verbosity", action="count", default=1,
796 help="enable additional output (use -vv or -vvv for more)")
797 parser.add_option("--debug", dest="verbosity", const=ui.DEBUG, action="store_const",
798 help="enable debugging output (same as -vvv)")
799 options, args = parser.parse_args()
800 if len(args) != 2:
801 parser.error("incorrect number of arguments")
802 if options.verbosity < 10:
803 # Expand multiple "-v" arguments to a real ui._level value
804 options.verbosity *= 10
805 if options.dry_run:
806 # When in dry-run mode, only try to process the next log_entry
807 options.entries_proc_limit = 1
808 ui.update_config(options)
809 return real_main(options, args)
810
811
812 if __name__ == "__main__":
813 sys.exit(main() or 0)