]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn/run/svn2svn.py
Additive verbosity command-line args
[svn2svn.git] / svn2svn / run / svn2svn.py
1 """
2 Replicate (replay) changesets from one SVN repository to another:
3 * Maintains full logical history (e.g. uses "svn copy" for renames).
4 * Maintains original commit messages.
5 * Optionally maintain source author info. (Only supported if accessing
6 target SVN repo via file://)
7 * Cannot maintain original commit date, but appends original commit date
8 for each commit message: "Date: %d".
9 * Optionally run an external shell script before each replayed commit
10 to give the ability to dynamically exclude or modify files as part
11 of the replay.
12
13 License: GPLv3, same as hgsvn (https://bitbucket.org/andialbrecht/hgsvn)
14 Author: Tony Duckles (https://github.com/tonyduckles/svn2svn)
15 (Inspired by http://code.google.com/p/svn2svn/, and uses code for hgsvn
16 for SVN client handling)
17 """
18
19 from .. import base_version, full_version
20 from .. import ui
21 from .. import svnclient
22 from ..shell import run_svn
23 from ..errors import (ExternalCommandFailed, UnsupportedSVNAction)
24
25 import sys
26 import os
27 import time
28 import traceback
29 import shutil
30 import operator
31 from optparse import OptionParser,OptionGroup
32 from datetime import datetime
33
34 def commit_from_svn_log_entry(log_entry, files=None, keep_author=False, target_revprops=[]):
35 """
36 Given an SVN log entry and an optional sequence of files, do an svn commit.
37 """
38 # TODO: Run optional external shell hook here, for doing pre-commit filtering
39 # This will use the local timezone for displaying commit times
40 timestamp = int(log_entry['date'])
41 svn_date = str(datetime.fromtimestamp(timestamp))
42 # Uncomment this one one if you prefer UTC commit times
43 #svn_date = "%d 0" % timestamp
44 if keep_author:
45 options = ["commit", "--force-log", "-m", log_entry['message'] + "\nDate: " + svn_date, "--username", log_entry['author']]
46 else:
47 options = ["commit", "--force-log", "-m", log_entry['message'] + "\nDate: " + svn_date + "\nAuthor: " + log_entry['author']]
48 revprops = {}
49 if log_entry['revprops']:
50 # Carry forward any revprop's from the source revision
51 for v in log_entry['revprops']:
52 revprops[v['name']] = v['value']
53 if target_revprops:
54 # Add any extra revprop's we want to set for the target repo commits
55 for v in target_revprops:
56 revprops[v['name']] = v['value']
57 if revprops:
58 for key in revprops:
59 options += ["--with-revprop", "%s=%s" % (key, str(revprops[key]))]
60 if files:
61 options += list(files)
62 if ui.get_level() >= ui.EXTRA:
63 ui.status(">> commit_from_svn_log_entry: Pre-commit _wc_target status:", level=ui.EXTRA, color='CYAN')
64 ui.status(run_svn(["status"]), level=ui.EXTRA, color='CYAN')
65 output = run_svn(options)
66 rev = None
67 if output:
68 output_lines = output.strip("\n").split("\n")
69 rev = ""
70 for line in output_lines:
71 if line[0:19] == 'Committed revision ':
72 rev = line[19:].rstrip('.')
73 break
74 if rev:
75 ui.status("Committed revision %s.", rev)
76 return rev
77
78 def full_svn_revert():
79 """
80 Do an "svn revert" and proactively remove any extra files in the working copy.
81 """
82 run_svn(["revert", "--recursive", "."])
83 output = run_svn(["status"])
84 if output:
85 output_lines = output.strip("\n").split("\n")
86 for line in output_lines:
87 if line[0] == "?":
88 path = line[4:].strip(" ")
89 if os.path.isfile(path):
90 os.remove(path)
91 if os.path.isdir(path):
92 shutil.rmtree(path)
93
94 def gen_tracking_revprops(source_repos_uuid, source_url, source_rev):
95 """
96 Build an array of svn2svn-specific source-tracking revprops.
97 """
98 revprops = [{'name':'svn2svn:source_uuid', 'value':source_repos_uuid},
99 {'name':'svn2svn:source_url', 'value':source_url},
100 {'name':'svn2svn:source_rev', 'value':source_rev}]
101 return revprops
102
103 def in_svn(p, require_in_repo=False, prefix=""):
104 """
105 Check if a given file/folder is being tracked by Subversion.
106 Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
107 With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
108 Use "svn status" to check the status of the file/folder.
109 """
110 entries = svnclient.get_svn_status(p, no_recursive=True)
111 if not entries:
112 return False
113 d = entries[0]
114 if require_in_repo and (d['status'] == 'added' or d['revision'] is None):
115 # If caller requires this path to be in the SVN repo, prevent returning True
116 # for paths that are only locally-added.
117 ret = False
118 else:
119 # Don't consider files tracked as deleted in the WC as under source-control.
120 # Consider files which are locally added/copied as under source-control.
121 ret = True if not (d['status'] == 'deleted') and (d['type'] == 'normal' or d['status'] == 'added' or d['copied'] == 'true') else False
122 ui.status(prefix + ">> in_svn('%s', require_in_repo=%s) --> %s", p, str(require_in_repo), str(ret), level=ui.DEBUG, color='GREEN')
123 return ret
124
125 def find_svn_ancestors(svn_repos_url, base_path, source_path, source_rev, prefix = ""):
126 """
127 Given a source path, walk the SVN history backwards to inspect the ancestory of
128 that path, seeing if it traces back to base_path. Build an array of copyfrom_path
129 and copyfrom_revision pairs for each of the "svn copies". If we find a copyfrom_path
130 which base_path is a substring match of (e.g. we crawled back to the initial branch-
131 copy from trunk), then return the collection of ancestor paths. Otherwise,
132 copyfrom_path has no ancestory compared to base_path.
133
134 This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
135 file/folder was renamed in a branch and then that branch was merged back to trunk.
136
137 'svn_repos_url' is the full URL to the root of the SVN repository,
138 e.g. 'file:///path/to/repo'
139 'base_path' is the path in the SVN repo to the target path we're trying to
140 trace ancestry back to, e.g. 'trunk'.
141 'source_path' is the path in the SVN repo to the source path to start checking
142 ancestry at, e.g. 'branches/fix1/projectA/file1.txt'.
143 (full_path = svn_repos_url+base_path+"/"+path_offset)
144 'source_rev' is the revision to start walking the history of source_path backwards from.
145 """
146 ui.status(prefix + ">> find_svn_ancestors: Start: (%s) source_path: %s base_path: %s",
147 svn_repos_url, source_path+"@"+str(source_rev), base_path, level=ui.DEBUG, color='YELLOW')
148 done = False
149 working_path = base_path+"/"+source_path
150 working_rev = source_rev
151 first_iter_done = False
152 ancestors_temp = []
153 while not done:
154 # Get the first "svn log" entry for this path (relative to @rev)
155 ui.status(prefix + ">> find_svn_ancestors: %s", svn_repos_url + working_path+"@"+str(working_rev), level=ui.DEBUG, color='YELLOW')
156 log_entry = svnclient.get_first_svn_log_entry(svn_repos_url + working_path+"@"+str(working_rev), 1, working_rev, True)
157 if not log_entry:
158 ui.status(prefix + ">> find_svn_ancestors: Done: no log_entry", level=ui.DEBUG, color='YELLOW')
159 done = True
160 break
161 # If we found a copy-from case which matches our base_path, we're done.
162 # ...but only if we've at least tried to search for the first copy-from path.
163 if first_iter_done and working_path.startswith(base_path):
164 ui.status(prefix + ">> find_svn_ancestors: Done: Found working_path.startswith(base_path) and first_iter_done=True", level=ui.DEBUG, color='YELLOW')
165 done = True
166 break
167 first_iter_done = True
168 # Search for any actions on our target path (or parent paths).
169 changed_paths_temp = []
170 for d in log_entry['changed_paths']:
171 path = d['path']
172 if path in working_path:
173 changed_paths_temp.append({'path': path, 'data': d})
174 if not changed_paths_temp:
175 # If no matches, then we've hit the end of the chain and this path has no ancestry back to base_path.
176 ui.status(prefix + ">> find_svn_ancestors: Done: No matching changed_paths", level=ui.DEBUG, color='YELLOW')
177 done = True
178 continue
179 # Reverse-sort any matches, so that we start with the most-granular (deepest in the tree) path.
180 changed_paths = sorted(changed_paths_temp, key=operator.itemgetter('path'), reverse=True)
181 # Find the action for our working_path in this revision. Use a loop to check in reverse order,
182 # so that if the target file/folder is "M" but has a parent folder with an "A" copy-from.
183 for v in changed_paths:
184 d = v['data']
185 path = d['path']
186 # Check action-type for this file
187 action = d['action']
188 if action not in 'MARD':
189 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
190 % (log_entry['revision'], action))
191 ui.status(prefix + "> %s %s%s", action, path,
192 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
193 level=ui.DEBUG, color='YELLOW')
194 if action == 'D':
195 # If file/folder was deleted, it has no ancestor
196 ancestors_temp = []
197 ui.status(prefix + ">> find_svn_ancestors: Done: deleted", level=ui.DEBUG, color='YELLOW')
198 done = True
199 break
200 if action in 'RA':
201 # If file/folder was added/replaced but not a copy, it has no ancestor
202 if not d['copyfrom_path']:
203 ancestors_temp = []
204 ui.status(prefix + ">> find_svn_ancestors: Done: %s with no copyfrom_path",
205 "Added" if action == "A" else "Replaced",
206 level=ui.DEBUG, color='YELLOW')
207 done = True
208 break
209 # Else, file/folder was added/replaced and is a copy, so add an entry to our ancestors list
210 # and keep checking for ancestors
211 ui.status(prefix + ">> find_svn_ancestors: Found copy-from (action=%s): %s --> %s",
212 action, path, d['copyfrom_path']+"@"+str(d['copyfrom_revision']),
213 level=ui.DEBUG, color='YELLOW')
214 ancestors_temp.append({'path': path, 'revision': log_entry['revision'],
215 'copyfrom_path': d['copyfrom_path'], 'copyfrom_rev': d['copyfrom_revision']})
216 working_path = working_path.replace(d['path'], d['copyfrom_path'])
217 working_rev = d['copyfrom_revision']
218 # Follow the copy and keep on searching
219 break
220 ancestors = []
221 if ancestors_temp:
222 ancestors.append({'path': base_path+"/"+source_path, 'revision': source_rev})
223 working_path = base_path+"/"+source_path
224 for idx in range(len(ancestors_temp)):
225 d = ancestors_temp[idx]
226 working_path = working_path.replace(d['path'], d['copyfrom_path'])
227 working_rev = d['copyfrom_rev']
228 ancestors.append({'path': working_path, 'revision': working_rev})
229 if ui.get_level() >= ui.DEBUG:
230 max_len = 0
231 for idx in range(len(ancestors)):
232 d = ancestors[idx]
233 max_len = max(max_len, len(d['path']+"@"+str(d['revision'])))
234 ui.status(prefix + ">> find_svn_ancestors: Found parent ancestors:", level=ui.DEBUG, color='YELLOW_B')
235 for idx in range(len(ancestors)-1):
236 d = ancestors[idx]
237 d_next = ancestors[idx+1]
238 ui.status(prefix + " [%s] %s <-- %s", idx,
239 str(d['path']+"@"+str(d['revision'])).ljust(max_len),
240 str(d_next['path']+"@"+str(d_next['revision'])).ljust(max_len),
241 level=ui.DEBUG, color='YELLOW')
242 else:
243 ui.status(prefix + ">> find_svn_ancestors: No ancestor-chain found: %s",
244 svn_repos_url+base_path+"/"+source_path+"@"+str(source_rev), level=ui.DEBUG, color='YELLOW')
245 return ancestors
246
247 def get_rev_map(rev_map, source_rev, prefix):
248 """
249 Find the equivalent rev # in the target repo for the given rev # from the source repo.
250 """
251 ui.status(prefix + ">> get_rev_map(%s)", source_rev, level=ui.DEBUG, color='GREEN')
252 # Find the highest entry less-than-or-equal-to source_rev
253 for rev in range(int(source_rev), 0, -1):
254 ui.status(prefix + ">> get_rev_map: rev=%s in_rev_map=%s", rev, str(rev in rev_map), level=ui.DEBUG, color='BLACK_B')
255 if rev in rev_map:
256 return int(rev_map[rev])
257 # Else, we fell off the bottom of the rev_map. Ruh-roh...
258 return None
259
260 def set_rev_map(rev_map, source_rev, target_rev):
261 ui.status(">> set_rev_map: source_rev=%s target_rev=%s", source_rev, target_rev, level=ui.DEBUG, color='GREEN')
262 rev_map[int(source_rev)]=int(target_rev)
263
264 def build_rev_map(target_url, source_info):
265 """
266 Check for any already-replayed history from source_url (source_info) and
267 build the mapping-table of source_rev -> target_rev.
268 """
269 rev_map = {}
270 ui.status("Rebuilding rev_map...", level=ui.VERBOSE)
271 proc_count = 0
272 it_log_entries = svnclient.iter_svn_log_entries(target_url, 1, 'HEAD', get_changed_paths=False, get_revprops=True)
273 for log_entry in it_log_entries:
274 if log_entry['revprops']:
275 revprops = {}
276 for v in log_entry['revprops']:
277 if v['name'].startswith('svn2svn:'):
278 revprops[v['name']] = v['value']
279 if revprops['svn2svn:source_uuid'] == source_info['repos_uuid'] and \
280 revprops['svn2svn:source_url'] == source_info['url']:
281 source_rev = revprops['svn2svn:source_rev']
282 target_rev = log_entry['revision']
283 set_rev_map(rev_map, source_rev, target_rev)
284 return rev_map
285
286 def get_svn_dirlist(svn_path, svn_rev = ""):
287 """
288 Get a list of all the child contents (recusive) of the given folder path.
289 """
290 args = ["list"]
291 path = svn_path
292 if svn_rev:
293 args += ["-r", svn_rev]
294 path += "@"+str(svn_rev)
295 args += [path]
296 paths = run_svn(args, no_fail=True)
297 paths = paths.strip("\n").split("\n") if len(paths)>1 else []
298 return paths
299
300 def add_path(paths, path_offset):
301 """
302 Helper function to add a path to a list but only if a parent path isn't
303 already in the list. Assumes that paths are added roughly in breath-first
304 order.
305 """
306 found = False
307 for p in paths:
308 if path_offset.startswith(p):
309 found = True
310 break
311 if not found:
312 paths.append(path_offset)
313 return paths
314
315 def do_svn_add(source_repos_url, source_url, path_offset, target_url, source_rev, \
316 parent_copyfrom_path="", parent_copyfrom_rev="", export_paths={}, \
317 rev_map={}, is_dir = False, prefix = ""):
318 """
319 Given the add'd source path, replay the "svn add/copy" commands to correctly
320 track renames across copy-from's.
321
322 For example, consider a sequence of events like this:
323 1. svn copy /trunk /branches/fix1
324 2. (Make some changes on /branches/fix1)
325 3. svn mv /branches/fix1/Proj1 /branches/fix1/Proj2 " Rename folder
326 4. svn mv /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt " Rename file inside renamed folder
327 5. svn co /trunk && svn merge /branches/fix1
328 After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
329 and and add of /trunk/Proj2 copy-from /branches/fix1/Proj2. If we were just
330 to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
331 we'd lose the logical history that Proj2/file2.txt is really a descendant
332 of Proj1/file1.txt.
333
334 'source_repos_url' is the full URL to the root of the source repository.
335 'source_url' is the full URL to the source path in the source repository.
336 'path_offset' is the offset from source_base to the file to check ancestry for,
337 e.g. 'projectA/file1.txt'. path = source_repos_url + source_base + path_offset.
338 'target_url' is the full URL to the target path in the target repository.
339 'source_rev' is the revision ("svn log") that we're processing from the source repo.
340 'parent_copyfrom_path' and 'parent_copyfrom_rev' is the copy-from path of the parent
341 directory, when being called recursively by do_svn_add_dir().
342 'export_paths' is the list of path_offset's that we've deferred running "svn export" on.
343 'rev_map' is the running mapping-table dictionary for source-repo rev #'s
344 to the equivalent target-repo rev #'s.
345 'is_dir' is whether path_offset is a directory (rather than a file).
346 """
347 source_base = source_url[len(source_repos_url):]
348 ui.status(prefix + ">> do_svn_add: %s %s", source_base+"/"+path_offset+"@"+str(source_rev),
349 " (parent-copyfrom: "+parent_copyfrom_path+"@"+str(parent_copyfrom_rev)+")" if parent_copyfrom_path else "",
350 level=ui.DEBUG, color='GREEN')
351 # Check if the given path has ancestors which chain back to the current source_base
352 found_ancestor = False
353 ancestors = find_svn_ancestors(source_repos_url, source_base, path_offset, source_rev, prefix+" ")
354 # ancestors[n] is the original (pre-branch-copy) trunk path.
355 # ancestors[n-1] is the first commit on the new branch.
356 copyfrom_path = ancestors[len(ancestors)-1]['path'] if ancestors else ""
357 copyfrom_rev = ancestors[len(ancestors)-1]['revision'] if ancestors else ""
358 if ancestors:
359 # The copy-from path has ancestory back to source_url.
360 ui.status(prefix + ">> do_svn_add: Check copy-from: Found parent: %s", copyfrom_path+"@"+str(copyfrom_rev),
361 level=ui.DEBUG, color='GREEN', bold=True)
362 found_ancestor = True
363 # Map the copyfrom_rev (source repo) to the equivalent target repo rev #. This can
364 # return None in the case where copyfrom_rev is *before* our source_start_rev.
365 tgt_rev = get_rev_map(rev_map, copyfrom_rev, prefix+" ")
366 ui.status(prefix + ">> do_svn_add: get_rev_map: %s (source) -> %s (target)", copyfrom_rev, tgt_rev, level=ui.DEBUG, color='GREEN')
367 else:
368 ui.status(prefix + ">> do_svn_add: Check copy-from: No ancestor chain found.", level=ui.DEBUG, color='GREEN')
369 found_ancestor = False
370 if found_ancestor and tgt_rev:
371 # Check if this path_offset in the target WC already has this ancestry, in which
372 # case there's no need to run the "svn copy" (again).
373 path_in_svn = in_svn(path_offset, prefix=prefix+" ")
374 log_entry = svnclient.get_last_svn_log_entry(path_offset, 1, 'HEAD', get_changed_paths=False) if in_svn(path_offset, require_in_repo=True, prefix=prefix+" ") else []
375 if (not log_entry or (log_entry['revision'] != tgt_rev)):
376 copyfrom_offset = copyfrom_path[len(source_base):].strip('/')
377 ui.status(prefix + ">> do_svn_add: svn_copy: Copy-from: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
378 ui.status(prefix + " copyfrom: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
379 ui.status(prefix + " p_copyfrom: %s", parent_copyfrom_path+"@"+str(parent_copyfrom_rev) if parent_copyfrom_path else "", level=ui.DEBUG, color='GREEN')
380 if path_in_svn and \
381 ((parent_copyfrom_path and copyfrom_path.startswith(parent_copyfrom_path)) and \
382 (parent_copyfrom_rev and copyfrom_rev == parent_copyfrom_rev)):
383 # When being called recursively, if this child entry has the same ancestor as the
384 # the parent, then no need to try to run another "svn copy".
385 ui.status(prefix + ">> do_svn_add: svn_copy: Same ancestry as parent: %s",
386 parent_copyfrom_path+"@"+str(parent_copyfrom_rev),level=ui.DEBUG, color='GREEN')
387 pass
388 else:
389 # Copy this path from the equivalent path+rev in the target repo, to create the
390 # equivalent history.
391 if parent_copyfrom_path:
392 # If we have a parent copy-from path, we mis-match that so display a status
393 # message describing the action we're mimic'ing. If path_in_svn, then this
394 # is logically a "replace" rather than an "add".
395 ui.status(" %s %s (from %s)", ('R' if path_in_svn else 'A'), source_base+"/"+path_offset, ancestors[1]['path']+"@"+str(copyfrom_rev), level=ui.VERBOSE)
396 if path_in_svn:
397 # If local file is already under version-control, then this is a replace.
398 ui.status(prefix + ">> do_svn_add: pre-copy: local path already exists: %s", path_offset, level=ui.DEBUG, color='GREEN')
399 run_svn(["remove", "--force", path_offset])
400 run_svn(["copy", "-r", tgt_rev, target_url+"/"+copyfrom_offset+"@"+str(tgt_rev), path_offset])
401 # Export the final version of this file/folder from the source repo, to make
402 # sure we're up-to-date.
403 export_paths = add_path(export_paths, path_offset)
404 else:
405 ui.status(prefix + ">> do_svn_add: Skipped 'svn copy': %s", path_offset, level=ui.DEBUG, color='GREEN')
406 else:
407 # Else, either this copy-from path has no ancestry back to source_url OR copyfrom_rev comes
408 # before our initial source_start_rev (i.e. tgt_rev == None), so can't do a "svn copy".
409 # Create (parent) directory if needed.
410 # TODO: This is (nearly) a duplicate of code in process_svn_log_entry(). Should this be
411 # split-out to a shared tag?
412 p_path = path_offset if is_dir else os.path.dirname(path_offset).strip() or '.'
413 if not os.path.exists(p_path):
414 run_svn(["mkdir", p_path])
415 if not in_svn(path_offset, prefix=prefix+" "):
416 if is_dir:
417 # Export the final verison of all files in this folder.
418 export_paths = add_path(export_paths, path_offset)
419 else:
420 # Export the final verison of this file. We *need* to do this before running
421 # the "svn add", even if we end-up re-exporting this file again via export_paths.
422 run_svn(["export", "--force", "-r", source_rev,
423 source_repos_url+source_base+"/"+path_offset+"@"+str(source_rev), path_offset])
424 # If not already under version-control, then "svn add" this file/folder.
425 run_svn(["add", "--parents", path_offset])
426 # TODO: Need to copy SVN properties from source repos
427 if is_dir:
428 # For any folders that we process, process any child contents, so that we correctly
429 # replay copies/replaces/etc.
430 do_svn_add_dir(source_repos_url, source_url, path_offset, source_rev, target_url,
431 copyfrom_path, copyfrom_rev, export_paths, rev_map, prefix+" ")
432
433 def do_svn_add_dir(source_repos_url, source_url, path_offset, source_rev, target_url, \
434 parent_copyfrom_path, parent_copyfrom_rev, export_paths, rev_map, prefix=""):
435 source_base = source_url[len(source_repos_url):]
436 # Get the directory contents, to compare between the local WC (target_url) vs. the remote repo (source_url)
437 # TODO: paths_local won't include add'd paths because "svn ls" lists the contents of the
438 # associated remote repo folder. (Is this a problem?)
439 paths_local = get_svn_dirlist(path_offset)
440 paths_remote = get_svn_dirlist(source_url+"/"+path_offset, source_rev)
441 ui.status(prefix + ">> do_svn_add_dir: paths_local: %s", str(paths_local), level=ui.DEBUG, color='GREEN')
442 ui.status(prefix + ">> do_svn_add_dir: paths_remote: %s", str(paths_remote), level=ui.DEBUG, color='GREEN')
443 # Update files/folders which exist in remote but not local
444 for path in paths_remote:
445 path_is_dir = True if path[-1] == "/" else False
446 working_path = path_offset+"/"+(path.rstrip('/') if path_is_dir else path)
447 do_svn_add(source_repos_url, source_url, working_path, target_url, source_rev,
448 parent_copyfrom_path, parent_copyfrom_rev, export_paths,
449 rev_map, path_is_dir, prefix+" ")
450 # Remove files/folders which exist in local but not remote
451 for path in paths_local:
452 if not path in paths_remote:
453 ui.status(" %s %s", 'D', source_base+"/"+path_offset+"/"+path, level=ui.VERBOSE)
454 run_svn(["remove", "--force", path_offset+"/"+path])
455 # TODO: Does this handle deleted folders too? Wouldn't want to have a case
456 # where we only delete all files from folder but leave orphaned folder around.
457
458 def process_svn_log_entry(log_entry, source_repos_url, source_url, target_url, \
459 rev_map, commit_paths = [], prefix = ""):
460 """
461 Process SVN changes from the given log entry.
462 Returns array of all the paths in the working-copy that were changed,
463 i.e. the paths which need to be "svn commit".
464
465 'log_entry' is the array structure built by parse_svn_log_xml().
466 'source_repos_url' is the full URL to the root of the source repository.
467 'source_url' is the full URL to the source path in the source repository.
468 'target_url' is the full URL to the target path in the target repository.
469 'rev_map' is the running mapping-table dictionary for source-repo rev #'s
470 to the equivalent target-repo rev #'s.
471 'commit_paths' is the working list of specific paths which changes to pass
472 to the final "svn commit".
473 """
474 export_paths = []
475 # Get the relative offset of source_url based on source_repos_url
476 # e.g. '/branches/bug123'
477 source_base = source_url[len(source_repos_url):]
478 source_rev = log_entry['revision']
479 ui.status(prefix + ">> process_svn_log_entry: %s", source_url+"@"+str(source_rev), level=ui.DEBUG, color='GREEN')
480 for d in log_entry['changed_paths']:
481 # Get the full path for this changed_path
482 # e.g. '/branches/bug123/projectA/file1.txt'
483 path = d['path']
484 if not path.startswith(source_base + "/"):
485 # Ignore changed files that are not part of this subdir
486 if path != source_base:
487 ui.status(prefix + ">> process_svn_log_entry: Unrelated path: %s (base: %s)", path, source_base, level=ui.DEBUG, color='GREEN')
488 continue
489 # Calculate the offset (based on source_base) for this changed_path
490 # e.g. 'projectA/file1.txt'
491 # (path = source_base + "/" + path_offset)
492 path_offset = path[len(source_base):].strip("/")
493 # Get the action for this path
494 action = d['action']
495 if action not in 'MARD':
496 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
497 % (source_rev, action))
498 ui.status(" %s %s%s", action, d['path'],
499 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
500 level=ui.VERBOSE)
501
502 # Try to be efficient and keep track of an explicit list of paths in the
503 # working copy that changed. If we commit from the root of the working copy,
504 # then SVN needs to crawl the entire working copy looking for pending changes.
505 commit_paths = add_path(commit_paths, path_offset)
506
507 # Special-handling for replace's
508 if action == 'R':
509 # If file was "replaced" (deleted then re-added, all in same revision),
510 # then we need to run the "svn rm" first, then change action='A'. This
511 # lets the normal code below handle re-"svn add"'ing the files. This
512 # should replicate the "replace".
513 run_svn(["remove", "--force", path_offset])
514 action = 'A'
515
516 # Handle all the various action-types
517 # (Handle "add" first, for "svn copy/move" support)
518 if action == 'A':
519 # Determine where to export from.
520 svn_copy = False
521 path_is_dir = True if d['kind'] == 'dir' else False
522 # Handle cases where this "add" was a copy from another URL in the source repos
523 if d['copyfrom_revision']:
524 copyfrom_path = d['copyfrom_path']
525 copyfrom_rev = d['copyfrom_revision']
526 do_svn_add(source_repos_url, source_url, path_offset, target_url, source_rev,
527 "", "", export_paths, rev_map, path_is_dir, prefix+" ")
528 # Else just "svn export" the files from the source repo and "svn add" them.
529 else:
530 # Create (parent) directory if needed
531 p_path = path_offset if path_is_dir else os.path.dirname(path_offset).strip() or '.'
532 if not os.path.exists(p_path):
533 run_svn(["mkdir", p_path])
534 # Export the entire added tree.
535 if path_is_dir:
536 # For directories, defer the (recurisve) "svn export". Might have a
537 # situation in a branch merge where the entry in the svn-log is a
538 # non-copy-from'd "add" but there are child contents (that we haven't
539 # gotten to yet in log_entry) that are copy-from's. When we try do
540 # the "svn copy" later on in do_svn_add() for those copy-from'd paths,
541 # having pre-existing (svn-add'd) contents creates some trouble.
542 # Instead, just create the stub folders ("svn mkdir" above) and defer
543 # exporting the final file-state until the end.
544 export_paths = add_path(export_paths, path_offset)
545 else:
546 # Export the final verison of this file. We *need* to do this before running
547 # the "svn add", even if we end-up re-exporting this file again via export_paths.
548 run_svn(["export", "--force", "-r", source_rev,
549 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
550 if not in_svn(path_offset, prefix=prefix+" "):
551 # Need to use in_svn here to handle cases where client committed the parent
552 # folder and each indiv sub-folder.
553 run_svn(["add", "--parents", path_offset])
554 # TODO: Need to copy SVN properties from source repos
555
556 elif action == 'D':
557 run_svn(["remove", "--force", path_offset])
558
559 elif action == 'M':
560 # TODO: Is "svn merge -c" correct here? Should this just be an "svn export" plus
561 # proplist updating?
562 out = run_svn(["merge", "-c", source_rev, "--non-recursive",
563 "--non-interactive", "--accept=theirs-full",
564 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
565
566 else:
567 raise SVNError("Internal Error: process_svn_log_entry: Unhandled 'action' value: '%s'"
568 % action)
569
570 # Export the final version of all add'd paths from source_url
571 if export_paths:
572 for path_offset in export_paths:
573 run_svn(["export", "--force", "-r", source_rev,
574 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
575
576 return commit_paths
577
578 def disp_svn_log_summary(log_entry):
579 ui.status("")
580 ui.status("r%s | %s | %s",
581 log_entry['revision'],
582 log_entry['author'],
583 str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' ')))
584 ui.status(log_entry['message'])
585 ui.status("------------------------------------------------------------------------")
586
587 def pull_svn_rev(log_entry, source_repos_url, source_repos_uuid, source_url, target_url, rev_map, keep_author=False):
588 """
589 Pull SVN changes from the given log entry.
590 Returns the new SVN revision.
591 If an exception occurs, it will rollback to revision 'source_rev - 1'.
592 """
593 disp_svn_log_summary(log_entry)
594 source_rev = log_entry['revision']
595
596 # Process all the paths in this log entry
597 commit_paths = []
598 process_svn_log_entry(log_entry, source_repos_url, source_url, target_url,
599 rev_map, commit_paths)
600 # If we had too many individual paths to commit, wipe the list and just commit at
601 # the root of the working copy.
602 if len (commit_paths) > 99:
603 commit_paths = []
604
605 target_revprops = gen_tracking_revprops(source_repos_uuid, source_url, source_rev) # Build source-tracking revprop's
606 return commit_from_svn_log_entry(log_entry, commit_paths, \
607 keep_author=keep_author, target_revprops=target_revprops)
608
609 def run_parser(parser):
610 """
611 Add common options to an OptionParser instance, and run parsing.
612 """
613 parser.add_option("", "--version", dest="show_version", action="store_true",
614 help="show version and exit")
615 parser.remove_option("--help")
616 parser.add_option("-h", "--help", dest="show_help", action="store_true",
617 help="show this help message and exit")
618 parser.add_option("-v", "--verbose", dest="verbosity", action="count", default=1,
619 help="enable additional output (use -vv or -vvv for more)")
620 parser.add_option("--debug", dest="verbosity", const=ui.DEBUG, action="store_const",
621 help="enable debugging output (same as -vvv)")
622 options, args = parser.parse_args()
623 if options.show_help:
624 parser.print_help()
625 sys.exit(0)
626 if options.show_version:
627 prog_name = os.path.basename(sys.argv[0])
628 print prog_name, full_version
629 sys.exit(0)
630 if options.verbosity < 10:
631 # Expand multiple "-v" arguments to a real ui._level value
632 options.verbosity *= 10
633 ui.update_config(options)
634 return options, args
635
636 def display_parser_error(parser, message):
637 """
638 Display an options error, and terminate.
639 """
640 print "error:", message
641 print
642 parser.print_help()
643 sys.exit(1)
644
645 def real_main(options, args):
646 source_url = args.pop(0).rstrip("/")
647 target_url = args.pop(0).rstrip("/")
648 if options.keep_author:
649 keep_author = True
650 else:
651 keep_author = False
652
653 # Make sure that both the source and target URL's are valid
654 source_info = svnclient.get_svn_info(source_url)
655 assert source_url.startswith(source_info['repos_url'])
656 target_info = svnclient.get_svn_info(target_url)
657 assert target_url.startswith(target_info['repos_url'])
658
659 source_end_rev = source_info['revision'] # Get the last revision # for the source repo
660 source_repos_url = source_info['repos_url'] # Get the base URL for the source repo, e.g. 'svn://svn.example.com/svn/repo'
661 source_repos_uuid = source_info['repos_uuid'] # Get the UUID for the source repo
662
663 wc_target = os.path.abspath('_wc_target')
664 rev_map = {}
665
666 # Check out a working copy of target_url if needed
667 wc_exists = os.path.exists(wc_target)
668 if wc_exists and not options.cont_from_break:
669 shutil.rmtree(wc_target)
670 wc_exists = False
671 if not wc_exists:
672 svnclient.svn_checkout(target_url, wc_target)
673 os.chdir(wc_target)
674
675 if not options.cont_from_break:
676 # Get log entry for the SVN revision we will check out
677 if options.svn_rev:
678 # If specify a rev, get log entry just before or at rev
679 source_start_log = svnclient.get_last_svn_log_entry(source_url, 1, options.svn_rev, False)
680 else:
681 # Otherwise, get log entry of branch creation
682 # Note: Trying to use svnclient.get_first_svn_log_entry(source_url, 1, source_end_rev, False)
683 # ends-up being *VERY* time-consuming on a repo with lots of revisions. Even though
684 # the "svn log" call is passing --limit 1, it seems like that limit-filter is happening
685 # _after_ svn has fetched the full log history. Instead, search the history in chunks
686 # and write some progress to the screen.
687 ui.status("Searching for start source revision (%s)...", source_url, level=ui.VERBOSE)
688 rev = 1
689 chunk_size = 1000
690 done = False
691 while not done:
692 entries = svnclient.run_svn_log(source_url, rev, min(rev+chunk_size-1, target_info['revision']), 1, get_changed_paths=False)
693 if entries:
694 source_start_log = entries[0]
695 done = True
696 break
697 ui.status("...%s...", rev)
698 rev = rev+chunk_size
699 if rev > target_info['revision']:
700 done = True
701 if not source_start_log:
702 raise RuntimeError("Unable to find first revision for source_url: %s" % source_url)
703
704 # This is the revision we will start from for source_url
705 source_start_rev = source_rev = int(source_start_log['revision'])
706 ui.status("Starting at source revision %s.", source_start_rev, level=ui.VERBOSE)
707
708 # For the initial commit to the target URL, export all the contents from
709 # the source URL at the start-revision.
710 paths = run_svn(["list", "-r", source_rev, source_url+"@"+str(source_rev)])
711 if len(paths)>1:
712 disp_svn_log_summary(svnclient.get_one_svn_log_entry(source_url, source_rev, source_rev))
713 ui.status("(Initial import)", level=ui.VERBOSE)
714 paths = paths.strip("\n").split("\n")
715 for path_raw in paths:
716 # For each top-level file/folder...
717 if not path_raw:
718 continue
719 # Directories have a trailing slash in the "svn list" output
720 path_is_dir = True if path_raw[-1] == "/" else False
721 path = path_raw.rstrip('/') if path_is_dir else path_raw
722 if path_is_dir and not os.path.exists(path):
723 os.makedirs(path)
724 ui.status(" A %s", source_url[len(source_repos_url):]+"/"+path, level=ui.VERBOSE)
725 run_svn(["export", "--force", "-r" , source_rev, source_url+"/"+path+"@"+str(source_rev), path])
726 run_svn(["add", path])
727 target_revprops = gen_tracking_revprops(source_repos_uuid, source_url, source_rev) # Build source-tracking revprop's
728 target_rev = commit_from_svn_log_entry(source_start_log, \
729 keep_author=keep_author, target_revprops=target_revprops)
730 if target_rev:
731 set_rev_map(rev_map, source_rev, target_rev)
732 else:
733 # Re-build the rev_map based on any already-replayed history in target_url
734 rev_map = build_rev_map(target_url, source_info)
735 if not rev_map:
736 raise RuntimeError("Called with continue-mode, but no already-replayed history found in target repo: %s" % target_url)
737 source_start_rev = int(max(rev_map, key=rev_map.get))
738 assert source_start_rev
739 ui.status("Continuing from source revision %s.", source_start_rev, level=ui.VERBOSE)
740
741 commit_count = 0
742 svn_vers_t = svnclient.get_svn_client_version()
743 svn_vers = float(".".join(map(str, svn_vers_t[0:2])))
744
745 # Load SVN log starting from source_start_rev + 1
746 it_log_entries = svnclient.iter_svn_log_entries(source_url, source_start_rev+1, source_end_rev, get_revprops=True)
747
748 try:
749 for log_entry in it_log_entries:
750 # Replay this revision from source_url into target_url
751 target_rev = pull_svn_rev(log_entry, source_repos_url, source_repos_uuid, source_url,
752 target_url, rev_map, keep_author)
753 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
754 run_svn(["update"])
755 commit_count += 1
756 # Run "svn cleanup" every 100 commits if SVN 1.7+, to clean-up orphaned ".svn/pristines/*"
757 if svn_vers >= 1.7 and (commit_count % 100 == 0):
758 run_svn(["cleanup"])
759 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
760 if target_rev:
761 source_rev = log_entry['revision']
762 set_rev_map(rev_map, source_rev, target_rev)
763
764 except KeyboardInterrupt:
765 print "\nStopped by user."
766 run_svn(["cleanup"])
767 full_svn_revert()
768 except:
769 print "\nCommand failed with following error:\n"
770 traceback.print_exc()
771 run_svn(["cleanup"])
772 print run_svn(["status"])
773 full_svn_revert()
774 finally:
775 run_svn(["update"])
776 print "\nFinished!"
777
778 def main():
779 # Defined as entry point. Must be callable without arguments.
780 usage = """Usage: %prog [OPTIONS] source_url target_url
781
782 Replicate (replay) history from one SVN repository to another. Maintain
783 logical ancestry wherever possible, so that 'svn log' on the replayed
784 repo will correctly follow file/folder renames.
785
786 == Examples ==
787 Create a copy of only /trunk from source repo, starting at r5000
788 $ svnadmin create /svn/target
789 $ svn mkdir -m 'Add trunk' file:///svn/target/trunk
790 $ svn2svn -av -r 5000 http://server/source/trunk file:///svn/target/trunk
791 1. The target_url will be checked-out to ./_wc_target
792 2. The first commit to http://server/source/trunk at/after r5000 will be
793 exported & added into _wc_target
794 3. All revisions affecting http://server/source/trunk (starting at r5000)
795 will be replayed to _wc_target. Any add/copy/move/replaces that are
796 copy-from'd some path outside of /trunk (e.g. files renamed on a /branch
797 and branch was merged into /trunk) will correctly maintain logical
798 ancestry where possible.
799
800 Use continue-mode (-c) to pick-up where the last run left-off
801 $ svn2svn -avc http://server/source/trunk file:///svn/target/trunk
802 1. The target_url will be checked-out to ./_wc_target, if not already
803 checked-out
804 2. All new revisions affecting http://server/source/trunk starting from
805 the last replayed revision to file:///svn/target/trunk (based on the
806 svn2svn:* revprops) will be replayed to _wc_target, maintaining all
807 logical ancestry where possible."""
808 parser = OptionParser(usage)
809 parser.add_option("-r", "--revision", type="int", dest="svn_rev", metavar="REV",
810 help="initial SVN revision to start source_url replay")
811 parser.add_option("-a", "--keep-author", action="store_true", dest="keep_author",
812 help="maintain original Author info from source repo")
813 parser.add_option("-c", "--continue", action="store_true", dest="cont_from_break",
814 help="continue from previous break")
815 (options, args) = run_parser(parser)
816 if len(args) != 2:
817 display_parser_error(parser, "incorrect number of arguments")
818 return real_main(options, args)
819
820
821 if __name__ == "__main__":
822 sys.exit(main() or 0)