]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn/run/svn2svn.py
Migrate shared code to commit_from_svn_log_entry()
[svn2svn.git] / svn2svn / run / svn2svn.py
1 """
2 Replicate (replay) changesets from one SVN repository to another:
3 * Maintains full logical history (e.g. uses "svn copy" for renames).
4 * Maintains original commit messages.
5 * Optionally maintain source author info. (Only supported if accessing
6 target SVN repo via file://)
7 * Cannot maintain original commit date, but appends original commit date
8 for each commit message: "Date: %d".
9 * Optionally run an external shell script before each replayed commit
10 to give the ability to dynamically exclude or modify files as part
11 of the replay.
12
13 License: GPLv3, same as hgsvn (https://bitbucket.org/andialbrecht/hgsvn)
14 Author: Tony Duckles (https://github.com/tonyduckles/svn2svn)
15 (Inspired by http://code.google.com/p/svn2svn/, and uses code for hgsvn
16 for SVN client handling)
17 """
18
19 from .. import base_version, full_version
20 from .. import ui
21 from .. import svnclient
22 from ..shell import run_svn
23 from ..errors import (ExternalCommandFailed, UnsupportedSVNAction)
24
25 import sys
26 import os
27 import time
28 import traceback
29 import shutil
30 import operator
31 from optparse import OptionParser,OptionGroup
32 from datetime import datetime
33
34 def commit_from_svn_log_entry(log_entry, options, commit_paths=None, target_revprops=None):
35 """
36 Given an SVN log entry and an optional list of changed paths, do an svn commit.
37 """
38 # TODO: Run optional external shell hook here, for doing pre-commit filtering
39 # Display the _wc_target "svn status" info if running in -vv (or higher) mode
40 if ui.get_level() >= ui.EXTRA:
41 ui.status(">> commit_from_svn_log_entry: Pre-commit _wc_target status:", level=ui.EXTRA, color='CYAN')
42 ui.status(run_svn(["status"]), level=ui.EXTRA, color='CYAN')
43 # This will use the local timezone for displaying commit times
44 timestamp = int(log_entry['date'])
45 svn_date = str(datetime.fromtimestamp(timestamp))
46 # Uncomment this one one if you prefer UTC commit times
47 #svn_date = "%d 0" % timestamp
48 args = ["commit", "--force-log"]
49 if options.keep_author:
50 args += ["-m", log_entry['message'] + "\nDate: " + svn_date, "--username", log_entry['author']]
51 else:
52 args += ["-m", log_entry['message'] + "\nDate: " + svn_date + "\nAuthor: " + log_entry['author']]
53 revprops = {}
54 if log_entry['revprops']:
55 # Carry forward any revprop's from the source revision
56 for v in log_entry['revprops']:
57 revprops[v['name']] = v['value']
58 if target_revprops:
59 # Add any extra revprop's we want to set for the target repo commits
60 for v in target_revprops:
61 revprops[v['name']] = v['value']
62 if revprops:
63 for key in revprops:
64 args += ["--with-revprop", "%s=%s" % (key, str(revprops[key]))]
65 if commit_paths:
66 if len(commit_paths)<100:
67 # If we don't have an excessive amount of individual changed paths, pass
68 # those to the "svn commit" command. Else, pass nothing so we commit at
69 # the root of the working-copy.
70 args += list(commit_paths)
71 rev = None
72 if output:
73 output_lines = output.strip("\n").split("\n")
74 rev = ""
75 for line in output_lines:
76 if line[0:19] == 'Committed revision ':
77 rev = line[19:].rstrip('.')
78 break
79 if rev:
80 ui.status("Committed revision %s.", rev)
81 return rev
82
83 def full_svn_revert():
84 """
85 Do an "svn revert" and proactively remove any extra files in the working copy.
86 """
87 run_svn(["revert", "--recursive", "."])
88 output = run_svn(["status"])
89 if output:
90 output_lines = output.strip("\n").split("\n")
91 for line in output_lines:
92 if line[0] == "?":
93 path = line[4:].strip(" ")
94 if os.path.isfile(path):
95 os.remove(path)
96 if os.path.isdir(path):
97 shutil.rmtree(path)
98
99 def gen_tracking_revprops(source_repos_uuid, source_url, source_rev):
100 """
101 Build an array of svn2svn-specific source-tracking revprops.
102 """
103 revprops = [{'name':'svn2svn:source_uuid', 'value':source_repos_uuid},
104 {'name':'svn2svn:source_url', 'value':source_url},
105 {'name':'svn2svn:source_rev', 'value':source_rev}]
106 return revprops
107
108 def in_svn(p, require_in_repo=False, prefix=""):
109 """
110 Check if a given file/folder is being tracked by Subversion.
111 Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
112 With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
113 Use "svn status" to check the status of the file/folder.
114 """
115 entries = svnclient.get_svn_status(p, no_recursive=True)
116 if not entries:
117 return False
118 d = entries[0]
119 if require_in_repo and (d['status'] == 'added' or d['revision'] is None):
120 # If caller requires this path to be in the SVN repo, prevent returning True
121 # for paths that are only locally-added.
122 ret = False
123 else:
124 # Don't consider files tracked as deleted in the WC as under source-control.
125 # Consider files which are locally added/copied as under source-control.
126 ret = True if not (d['status'] == 'deleted') and (d['type'] == 'normal' or d['status'] == 'added' or d['copied'] == 'true') else False
127 ui.status(prefix + ">> in_svn('%s', require_in_repo=%s) --> %s", p, str(require_in_repo), str(ret), level=ui.DEBUG, color='GREEN')
128 return ret
129
130 def find_svn_ancestors(svn_repos_url, base_path, source_path, source_rev, prefix = ""):
131 """
132 Given a source path, walk the SVN history backwards to inspect the ancestory of
133 that path, seeing if it traces back to base_path. Build an array of copyfrom_path
134 and copyfrom_revision pairs for each of the "svn copies". If we find a copyfrom_path
135 which base_path is a substring match of (e.g. we crawled back to the initial branch-
136 copy from trunk), then return the collection of ancestor paths. Otherwise,
137 copyfrom_path has no ancestory compared to base_path.
138
139 This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
140 file/folder was renamed in a branch and then that branch was merged back to trunk.
141
142 'svn_repos_url' is the full URL to the root of the SVN repository,
143 e.g. 'file:///path/to/repo'
144 'base_path' is the path in the SVN repo to the target path we're trying to
145 trace ancestry back to, e.g. 'trunk'.
146 'source_path' is the path in the SVN repo to the source path to start checking
147 ancestry at, e.g. 'branches/fix1/projectA/file1.txt'.
148 (full_path = svn_repos_url+base_path+"/"+path_offset)
149 'source_rev' is the revision to start walking the history of source_path backwards from.
150 """
151 ui.status(prefix + ">> find_svn_ancestors: Start: (%s) source_path: %s base_path: %s",
152 svn_repos_url, source_path+"@"+str(source_rev), base_path, level=ui.DEBUG, color='YELLOW')
153 done = False
154 working_path = base_path+"/"+source_path
155 working_rev = source_rev
156 first_iter_done = False
157 ancestors_temp = []
158 while not done:
159 # Get the first "svn log" entry for this path (relative to @rev)
160 ui.status(prefix + ">> find_svn_ancestors: %s", svn_repos_url + working_path+"@"+str(working_rev), level=ui.DEBUG, color='YELLOW')
161 log_entry = svnclient.get_first_svn_log_entry(svn_repos_url + working_path+"@"+str(working_rev), 1, working_rev, True)
162 if not log_entry:
163 ui.status(prefix + ">> find_svn_ancestors: Done: no log_entry", level=ui.DEBUG, color='YELLOW')
164 done = True
165 break
166 # If we found a copy-from case which matches our base_path, we're done.
167 # ...but only if we've at least tried to search for the first copy-from path.
168 if first_iter_done and working_path.startswith(base_path):
169 ui.status(prefix + ">> find_svn_ancestors: Done: Found working_path.startswith(base_path) and first_iter_done=True", level=ui.DEBUG, color='YELLOW')
170 done = True
171 break
172 first_iter_done = True
173 # Search for any actions on our target path (or parent paths).
174 changed_paths_temp = []
175 for d in log_entry['changed_paths']:
176 path = d['path']
177 if path in working_path:
178 changed_paths_temp.append({'path': path, 'data': d})
179 if not changed_paths_temp:
180 # If no matches, then we've hit the end of the chain and this path has no ancestry back to base_path.
181 ui.status(prefix + ">> find_svn_ancestors: Done: No matching changed_paths", level=ui.DEBUG, color='YELLOW')
182 done = True
183 continue
184 # Reverse-sort any matches, so that we start with the most-granular (deepest in the tree) path.
185 changed_paths = sorted(changed_paths_temp, key=operator.itemgetter('path'), reverse=True)
186 # Find the action for our working_path in this revision. Use a loop to check in reverse order,
187 # so that if the target file/folder is "M" but has a parent folder with an "A" copy-from.
188 for v in changed_paths:
189 d = v['data']
190 path = d['path']
191 # Check action-type for this file
192 action = d['action']
193 if action not in 'MARD':
194 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
195 % (log_entry['revision'], action))
196 ui.status(prefix + "> %s %s%s", action, path,
197 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
198 level=ui.DEBUG, color='YELLOW')
199 if action == 'D':
200 # If file/folder was deleted, it has no ancestor
201 ancestors_temp = []
202 ui.status(prefix + ">> find_svn_ancestors: Done: deleted", level=ui.DEBUG, color='YELLOW')
203 done = True
204 break
205 if action in 'RA':
206 # If file/folder was added/replaced but not a copy, it has no ancestor
207 if not d['copyfrom_path']:
208 ancestors_temp = []
209 ui.status(prefix + ">> find_svn_ancestors: Done: %s with no copyfrom_path",
210 "Added" if action == "A" else "Replaced",
211 level=ui.DEBUG, color='YELLOW')
212 done = True
213 break
214 # Else, file/folder was added/replaced and is a copy, so add an entry to our ancestors list
215 # and keep checking for ancestors
216 ui.status(prefix + ">> find_svn_ancestors: Found copy-from (action=%s): %s --> %s",
217 action, path, d['copyfrom_path']+"@"+str(d['copyfrom_revision']),
218 level=ui.DEBUG, color='YELLOW')
219 ancestors_temp.append({'path': path, 'revision': log_entry['revision'],
220 'copyfrom_path': d['copyfrom_path'], 'copyfrom_rev': d['copyfrom_revision']})
221 working_path = working_path.replace(d['path'], d['copyfrom_path'])
222 working_rev = d['copyfrom_revision']
223 # Follow the copy and keep on searching
224 break
225 ancestors = []
226 if ancestors_temp:
227 ancestors.append({'path': base_path+"/"+source_path, 'revision': source_rev})
228 working_path = base_path+"/"+source_path
229 for idx in range(len(ancestors_temp)):
230 d = ancestors_temp[idx]
231 working_path = working_path.replace(d['path'], d['copyfrom_path'])
232 working_rev = d['copyfrom_rev']
233 ancestors.append({'path': working_path, 'revision': working_rev})
234 if ui.get_level() >= ui.DEBUG:
235 max_len = 0
236 for idx in range(len(ancestors)):
237 d = ancestors[idx]
238 max_len = max(max_len, len(d['path']+"@"+str(d['revision'])))
239 ui.status(prefix + ">> find_svn_ancestors: Found parent ancestors:", level=ui.DEBUG, color='YELLOW_B')
240 for idx in range(len(ancestors)-1):
241 d = ancestors[idx]
242 d_next = ancestors[idx+1]
243 ui.status(prefix + " [%s] %s <-- %s", idx,
244 str(d['path']+"@"+str(d['revision'])).ljust(max_len),
245 str(d_next['path']+"@"+str(d_next['revision'])).ljust(max_len),
246 level=ui.DEBUG, color='YELLOW')
247 else:
248 ui.status(prefix + ">> find_svn_ancestors: No ancestor-chain found: %s",
249 svn_repos_url+base_path+"/"+source_path+"@"+str(source_rev), level=ui.DEBUG, color='YELLOW')
250 return ancestors
251
252 def get_rev_map(rev_map, source_rev, prefix):
253 """
254 Find the equivalent rev # in the target repo for the given rev # from the source repo.
255 """
256 ui.status(prefix + ">> get_rev_map(%s)", source_rev, level=ui.DEBUG, color='GREEN')
257 # Find the highest entry less-than-or-equal-to source_rev
258 for rev in range(int(source_rev), 0, -1):
259 ui.status(prefix + ">> get_rev_map: rev=%s in_rev_map=%s", rev, str(rev in rev_map), level=ui.DEBUG, color='BLACK_B')
260 if rev in rev_map:
261 return int(rev_map[rev])
262 # Else, we fell off the bottom of the rev_map. Ruh-roh...
263 return None
264
265 def set_rev_map(rev_map, source_rev, target_rev):
266 ui.status(">> set_rev_map: source_rev=%s target_rev=%s", source_rev, target_rev, level=ui.DEBUG, color='GREEN')
267 rev_map[int(source_rev)]=int(target_rev)
268
269 def build_rev_map(target_url, source_info):
270 """
271 Check for any already-replayed history from source_url (source_info) and
272 build the mapping-table of source_rev -> target_rev.
273 """
274 rev_map = {}
275 ui.status("Rebuilding rev_map...", level=ui.VERBOSE)
276 proc_count = 0
277 it_log_entries = svnclient.iter_svn_log_entries(target_url, 1, 'HEAD', get_changed_paths=False, get_revprops=True)
278 for log_entry in it_log_entries:
279 if log_entry['revprops']:
280 revprops = {}
281 for v in log_entry['revprops']:
282 if v['name'].startswith('svn2svn:'):
283 revprops[v['name']] = v['value']
284 if revprops['svn2svn:source_uuid'] == source_info['repos_uuid'] and \
285 revprops['svn2svn:source_url'] == source_info['url']:
286 source_rev = revprops['svn2svn:source_rev']
287 target_rev = log_entry['revision']
288 set_rev_map(rev_map, source_rev, target_rev)
289 return rev_map
290
291 def get_svn_dirlist(svn_path, svn_rev = ""):
292 """
293 Get a list of all the child contents (recusive) of the given folder path.
294 """
295 args = ["list"]
296 path = svn_path
297 if svn_rev:
298 args += ["-r", svn_rev]
299 path += "@"+str(svn_rev)
300 args += [path]
301 paths = run_svn(args, no_fail=True)
302 paths = paths.strip("\n").split("\n") if len(paths)>1 else []
303 return paths
304
305 def add_path(paths, path_offset):
306 """
307 Helper function to add a path to a list but only if a parent path isn't
308 already in the list. Assumes that paths are added roughly in breath-first
309 order.
310 """
311 found = False
312 for p in paths:
313 if path_offset.startswith(p):
314 found = True
315 break
316 if not found:
317 paths.append(path_offset)
318 return paths
319
320 def do_svn_add(source_repos_url, source_url, path_offset, target_url, source_rev, \
321 parent_copyfrom_path="", parent_copyfrom_rev="", export_paths={}, \
322 rev_map={}, is_dir = False, prefix = ""):
323 """
324 Given the add'd source path, replay the "svn add/copy" commands to correctly
325 track renames across copy-from's.
326
327 For example, consider a sequence of events like this:
328 1. svn copy /trunk /branches/fix1
329 2. (Make some changes on /branches/fix1)
330 3. svn mv /branches/fix1/Proj1 /branches/fix1/Proj2 " Rename folder
331 4. svn mv /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt " Rename file inside renamed folder
332 5. svn co /trunk && svn merge /branches/fix1
333 After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
334 and and add of /trunk/Proj2 copy-from /branches/fix1/Proj2. If we were just
335 to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
336 we'd lose the logical history that Proj2/file2.txt is really a descendant
337 of Proj1/file1.txt.
338
339 'source_repos_url' is the full URL to the root of the source repository.
340 'source_url' is the full URL to the source path in the source repository.
341 'path_offset' is the offset from source_base to the file to check ancestry for,
342 e.g. 'projectA/file1.txt'. path = source_repos_url + source_base + path_offset.
343 'target_url' is the full URL to the target path in the target repository.
344 'source_rev' is the revision ("svn log") that we're processing from the source repo.
345 'parent_copyfrom_path' and 'parent_copyfrom_rev' is the copy-from path of the parent
346 directory, when being called recursively by do_svn_add_dir().
347 'export_paths' is the list of path_offset's that we've deferred running "svn export" on.
348 'rev_map' is the running mapping-table dictionary for source-repo rev #'s
349 to the equivalent target-repo rev #'s.
350 'is_dir' is whether path_offset is a directory (rather than a file).
351 """
352 source_base = source_url[len(source_repos_url):]
353 ui.status(prefix + ">> do_svn_add: %s %s", source_base+"/"+path_offset+"@"+str(source_rev),
354 " (parent-copyfrom: "+parent_copyfrom_path+"@"+str(parent_copyfrom_rev)+")" if parent_copyfrom_path else "",
355 level=ui.DEBUG, color='GREEN')
356 # Check if the given path has ancestors which chain back to the current source_base
357 found_ancestor = False
358 ancestors = find_svn_ancestors(source_repos_url, source_base, path_offset, source_rev, prefix+" ")
359 # ancestors[n] is the original (pre-branch-copy) trunk path.
360 # ancestors[n-1] is the first commit on the new branch.
361 copyfrom_path = ancestors[len(ancestors)-1]['path'] if ancestors else ""
362 copyfrom_rev = ancestors[len(ancestors)-1]['revision'] if ancestors else ""
363 if ancestors:
364 # The copy-from path has ancestory back to source_url.
365 ui.status(prefix + ">> do_svn_add: Check copy-from: Found parent: %s", copyfrom_path+"@"+str(copyfrom_rev),
366 level=ui.DEBUG, color='GREEN', bold=True)
367 found_ancestor = True
368 # Map the copyfrom_rev (source repo) to the equivalent target repo rev #. This can
369 # return None in the case where copyfrom_rev is *before* our source_start_rev.
370 tgt_rev = get_rev_map(rev_map, copyfrom_rev, prefix+" ")
371 ui.status(prefix + ">> do_svn_add: get_rev_map: %s (source) -> %s (target)", copyfrom_rev, tgt_rev, level=ui.DEBUG, color='GREEN')
372 else:
373 ui.status(prefix + ">> do_svn_add: Check copy-from: No ancestor chain found.", level=ui.DEBUG, color='GREEN')
374 found_ancestor = False
375 if found_ancestor and tgt_rev:
376 # Check if this path_offset in the target WC already has this ancestry, in which
377 # case there's no need to run the "svn copy" (again).
378 path_in_svn = in_svn(path_offset, prefix=prefix+" ")
379 log_entry = svnclient.get_last_svn_log_entry(path_offset, 1, 'HEAD', get_changed_paths=False) if in_svn(path_offset, require_in_repo=True, prefix=prefix+" ") else []
380 if (not log_entry or (log_entry['revision'] != tgt_rev)):
381 copyfrom_offset = copyfrom_path[len(source_base):].strip('/')
382 ui.status(prefix + ">> do_svn_add: svn_copy: Copy-from: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
383 ui.status(prefix + " copyfrom: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
384 ui.status(prefix + " p_copyfrom: %s", parent_copyfrom_path+"@"+str(parent_copyfrom_rev) if parent_copyfrom_path else "", level=ui.DEBUG, color='GREEN')
385 if path_in_svn and \
386 ((parent_copyfrom_path and copyfrom_path.startswith(parent_copyfrom_path)) and \
387 (parent_copyfrom_rev and copyfrom_rev == parent_copyfrom_rev)):
388 # When being called recursively, if this child entry has the same ancestor as the
389 # the parent, then no need to try to run another "svn copy".
390 ui.status(prefix + ">> do_svn_add: svn_copy: Same ancestry as parent: %s",
391 parent_copyfrom_path+"@"+str(parent_copyfrom_rev),level=ui.DEBUG, color='GREEN')
392 pass
393 else:
394 # Copy this path from the equivalent path+rev in the target repo, to create the
395 # equivalent history.
396 if parent_copyfrom_path:
397 # If we have a parent copy-from path, we mis-match that so display a status
398 # message describing the action we're mimic'ing. If path_in_svn, then this
399 # is logically a "replace" rather than an "add".
400 ui.status(" %s %s (from %s)", ('R' if path_in_svn else 'A'), source_base+"/"+path_offset, ancestors[1]['path']+"@"+str(copyfrom_rev), level=ui.VERBOSE)
401 if path_in_svn:
402 # If local file is already under version-control, then this is a replace.
403 ui.status(prefix + ">> do_svn_add: pre-copy: local path already exists: %s", path_offset, level=ui.DEBUG, color='GREEN')
404 run_svn(["remove", "--force", path_offset])
405 run_svn(["copy", "-r", tgt_rev, target_url+"/"+copyfrom_offset+"@"+str(tgt_rev), path_offset])
406 # Export the final version of this file/folder from the source repo, to make
407 # sure we're up-to-date.
408 export_paths = add_path(export_paths, path_offset)
409 else:
410 ui.status(prefix + ">> do_svn_add: Skipped 'svn copy': %s", path_offset, level=ui.DEBUG, color='GREEN')
411 else:
412 # Else, either this copy-from path has no ancestry back to source_url OR copyfrom_rev comes
413 # before our initial source_start_rev (i.e. tgt_rev == None), so can't do a "svn copy".
414 # Create (parent) directory if needed.
415 # TODO: This is (nearly) a duplicate of code in process_svn_log_entry(). Should this be
416 # split-out to a shared tag?
417 p_path = path_offset if is_dir else os.path.dirname(path_offset).strip() or '.'
418 if not os.path.exists(p_path):
419 run_svn(["mkdir", p_path])
420 if not in_svn(path_offset, prefix=prefix+" "):
421 if is_dir:
422 # Export the final verison of all files in this folder.
423 export_paths = add_path(export_paths, path_offset)
424 else:
425 # Export the final verison of this file. We *need* to do this before running
426 # the "svn add", even if we end-up re-exporting this file again via export_paths.
427 run_svn(["export", "--force", "-r", source_rev,
428 source_repos_url+source_base+"/"+path_offset+"@"+str(source_rev), path_offset])
429 # If not already under version-control, then "svn add" this file/folder.
430 run_svn(["add", "--parents", path_offset])
431 # TODO: Need to copy SVN properties from source repos
432 if is_dir:
433 # For any folders that we process, process any child contents, so that we correctly
434 # replay copies/replaces/etc.
435 do_svn_add_dir(source_repos_url, source_url, path_offset, source_rev, target_url,
436 copyfrom_path, copyfrom_rev, export_paths, rev_map, prefix+" ")
437
438 def do_svn_add_dir(source_repos_url, source_url, path_offset, source_rev, target_url, \
439 parent_copyfrom_path, parent_copyfrom_rev, export_paths, rev_map, prefix=""):
440 source_base = source_url[len(source_repos_url):]
441 # Get the directory contents, to compare between the local WC (target_url) vs. the remote repo (source_url)
442 # TODO: paths_local won't include add'd paths because "svn ls" lists the contents of the
443 # associated remote repo folder. (Is this a problem?)
444 paths_local = get_svn_dirlist(path_offset)
445 paths_remote = get_svn_dirlist(source_url+"/"+path_offset, source_rev)
446 ui.status(prefix + ">> do_svn_add_dir: paths_local: %s", str(paths_local), level=ui.DEBUG, color='GREEN')
447 ui.status(prefix + ">> do_svn_add_dir: paths_remote: %s", str(paths_remote), level=ui.DEBUG, color='GREEN')
448 # Update files/folders which exist in remote but not local
449 for path in paths_remote:
450 path_is_dir = True if path[-1] == "/" else False
451 working_path = path_offset+"/"+(path.rstrip('/') if path_is_dir else path)
452 do_svn_add(source_repos_url, source_url, working_path, target_url, source_rev,
453 parent_copyfrom_path, parent_copyfrom_rev, export_paths,
454 rev_map, path_is_dir, prefix+" ")
455 # Remove files/folders which exist in local but not remote
456 for path in paths_local:
457 if not path in paths_remote:
458 ui.status(" %s %s", 'D', source_base+"/"+path_offset+"/"+path, level=ui.VERBOSE)
459 run_svn(["remove", "--force", path_offset+"/"+path])
460 # TODO: Does this handle deleted folders too? Wouldn't want to have a case
461 # where we only delete all files from folder but leave orphaned folder around.
462
463 def process_svn_log_entry(log_entry, source_repos_url, source_url, target_url, \
464 rev_map, commit_paths = [], prefix = ""):
465 """
466 Process SVN changes from the given log entry.
467 Returns array of all the paths in the working-copy that were changed,
468 i.e. the paths which need to be "svn commit".
469
470 'log_entry' is the array structure built by parse_svn_log_xml().
471 'source_repos_url' is the full URL to the root of the source repository.
472 'source_url' is the full URL to the source path in the source repository.
473 'target_url' is the full URL to the target path in the target repository.
474 'rev_map' is the running mapping-table dictionary for source-repo rev #'s
475 to the equivalent target-repo rev #'s.
476 'commit_paths' is the working list of specific paths which changes to pass
477 to the final "svn commit".
478 """
479 export_paths = []
480 # Get the relative offset of source_url based on source_repos_url
481 # e.g. '/branches/bug123'
482 source_base = source_url[len(source_repos_url):]
483 source_rev = log_entry['revision']
484 ui.status(prefix + ">> process_svn_log_entry: %s", source_url+"@"+str(source_rev), level=ui.DEBUG, color='GREEN')
485 for d in log_entry['changed_paths']:
486 # Get the full path for this changed_path
487 # e.g. '/branches/bug123/projectA/file1.txt'
488 path = d['path']
489 if not path.startswith(source_base + "/"):
490 # Ignore changed files that are not part of this subdir
491 if path != source_base:
492 ui.status(prefix + ">> process_svn_log_entry: Unrelated path: %s (base: %s)", path, source_base, level=ui.DEBUG, color='GREEN')
493 continue
494 # Calculate the offset (based on source_base) for this changed_path
495 # e.g. 'projectA/file1.txt'
496 # (path = source_base + "/" + path_offset)
497 path_offset = path[len(source_base):].strip("/")
498 # Get the action for this path
499 action = d['action']
500 if action not in 'MARD':
501 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
502 % (source_rev, action))
503 ui.status(" %s %s%s", action, d['path'],
504 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
505 level=ui.VERBOSE)
506
507 # Try to be efficient and keep track of an explicit list of paths in the
508 # working copy that changed. If we commit from the root of the working copy,
509 # then SVN needs to crawl the entire working copy looking for pending changes.
510 commit_paths = add_path(commit_paths, path_offset)
511
512 # Special-handling for replace's
513 if action == 'R':
514 # If file was "replaced" (deleted then re-added, all in same revision),
515 # then we need to run the "svn rm" first, then change action='A'. This
516 # lets the normal code below handle re-"svn add"'ing the files. This
517 # should replicate the "replace".
518 run_svn(["remove", "--force", path_offset])
519 action = 'A'
520
521 # Handle all the various action-types
522 # (Handle "add" first, for "svn copy/move" support)
523 if action == 'A':
524 # Determine where to export from.
525 svn_copy = False
526 path_is_dir = True if d['kind'] == 'dir' else False
527 # Handle cases where this "add" was a copy from another URL in the source repos
528 if d['copyfrom_revision']:
529 copyfrom_path = d['copyfrom_path']
530 copyfrom_rev = d['copyfrom_revision']
531 do_svn_add(source_repos_url, source_url, path_offset, target_url, source_rev,
532 "", "", export_paths, rev_map, path_is_dir, prefix+" ")
533 # Else just "svn export" the files from the source repo and "svn add" them.
534 else:
535 # Create (parent) directory if needed
536 p_path = path_offset if path_is_dir else os.path.dirname(path_offset).strip() or '.'
537 if not os.path.exists(p_path):
538 run_svn(["mkdir", p_path])
539 # Export the entire added tree.
540 if path_is_dir:
541 # For directories, defer the (recurisve) "svn export". Might have a
542 # situation in a branch merge where the entry in the svn-log is a
543 # non-copy-from'd "add" but there are child contents (that we haven't
544 # gotten to yet in log_entry) that are copy-from's. When we try do
545 # the "svn copy" later on in do_svn_add() for those copy-from'd paths,
546 # having pre-existing (svn-add'd) contents creates some trouble.
547 # Instead, just create the stub folders ("svn mkdir" above) and defer
548 # exporting the final file-state until the end.
549 export_paths = add_path(export_paths, path_offset)
550 else:
551 # Export the final verison of this file. We *need* to do this before running
552 # the "svn add", even if we end-up re-exporting this file again via export_paths.
553 run_svn(["export", "--force", "-r", source_rev,
554 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
555 if not in_svn(path_offset, prefix=prefix+" "):
556 # Need to use in_svn here to handle cases where client committed the parent
557 # folder and each indiv sub-folder.
558 run_svn(["add", "--parents", path_offset])
559 # TODO: Need to copy SVN properties from source repos
560
561 elif action == 'D':
562 run_svn(["remove", "--force", path_offset])
563
564 elif action == 'M':
565 # TODO: Is "svn merge -c" correct here? Should this just be an "svn export" plus
566 # proplist updating?
567 out = run_svn(["merge", "-c", source_rev, "--non-recursive",
568 "--non-interactive", "--accept=theirs-full",
569 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
570
571 else:
572 raise SVNError("Internal Error: process_svn_log_entry: Unhandled 'action' value: '%s'"
573 % action)
574
575 # Export the final version of all add'd paths from source_url
576 if export_paths:
577 for path_offset in export_paths:
578 run_svn(["export", "--force", "-r", source_rev,
579 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
580
581 return commit_paths
582
583 def disp_svn_log_summary(log_entry):
584 ui.status("")
585 ui.status("r%s | %s | %s",
586 log_entry['revision'],
587 log_entry['author'],
588 str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' ')))
589 ui.status(log_entry['message'])
590 ui.status("------------------------------------------------------------------------")
591
592 def pull_svn_rev(log_entry, source_repos_url, source_repos_uuid, source_url, target_url, rev_map, keep_author=False):
593 """
594 Pull SVN changes from the given log entry.
595 Returns the new SVN revision.
596 If an exception occurs, it will rollback to revision 'source_rev - 1'.
597 """
598 disp_svn_log_summary(log_entry)
599 source_rev = log_entry['revision']
600
601 # Process all the paths in this log entry
602 commit_paths = []
603 process_svn_log_entry(log_entry, source_repos_url, source_url, target_url,
604 rev_map, commit_paths)
605 # If we had too many individual paths to commit, wipe the list and just commit at
606 # the root of the working copy.
607 if len (commit_paths) > 99:
608 commit_paths = []
609
610 target_revprops = gen_tracking_revprops(source_repos_uuid, source_url, source_rev) # Build source-tracking revprop's
611 return commit_from_svn_log_entry(log_entry, commit_paths, \
612 keep_author=keep_author, target_revprops=target_revprops)
613
614 def run_parser(parser):
615 """
616 Add common options to an OptionParser instance, and run parsing.
617 """
618 parser.add_option("", "--version", dest="show_version", action="store_true",
619 help="show version and exit")
620 parser.remove_option("--help")
621 parser.add_option("-h", "--help", dest="show_help", action="store_true",
622 help="show this help message and exit")
623 parser.add_option("-v", "--verbose", dest="verbosity", action="count", default=1,
624 help="enable additional output (use -vv or -vvv for more)")
625 parser.add_option("--debug", dest="verbosity", const=ui.DEBUG, action="store_const",
626 help="enable debugging output (same as -vvv)")
627 options, args = parser.parse_args()
628 if options.show_help:
629 parser.print_help()
630 sys.exit(0)
631 if options.show_version:
632 prog_name = os.path.basename(sys.argv[0])
633 print prog_name, full_version
634 sys.exit(0)
635 if options.verbosity < 10:
636 # Expand multiple "-v" arguments to a real ui._level value
637 options.verbosity *= 10
638 ui.update_config(options)
639 return options, args
640
641 def display_parser_error(parser, message):
642 """
643 Display an options error, and terminate.
644 """
645 print "error:", message
646 print
647 parser.print_help()
648 sys.exit(1)
649
650 def real_main(options, args):
651 source_url = args.pop(0).rstrip("/")
652 target_url = args.pop(0).rstrip("/")
653 ui.status("options: %s", str(options), level=ui.DEBUG, color='GREEN')
654
655 # Make sure that both the source and target URL's are valid
656 source_info = svnclient.get_svn_info(source_url)
657 assert source_url.startswith(source_info['repos_url'])
658 target_info = svnclient.get_svn_info(target_url)
659 assert target_url.startswith(target_info['repos_url'])
660
661 source_end_rev = source_info['revision'] # Get the last revision # for the source repo
662 source_repos_url = source_info['repos_url'] # Get the base URL for the source repo, e.g. 'svn://svn.example.com/svn/repo'
663 source_repos_uuid = source_info['repos_uuid'] # Get the UUID for the source repo
664
665 wc_target = os.path.abspath('_wc_target')
666 rev_map = {}
667 num_entries_proc = 0
668 commit_count = 0
669 source_rev = None
670 target_rev = None
671
672 # Check out a working copy of target_url if needed
673 wc_exists = os.path.exists(wc_target)
674 if wc_exists and not options.cont_from_break:
675 shutil.rmtree(wc_target)
676 wc_exists = False
677 if not wc_exists:
678 svnclient.svn_checkout(target_url, wc_target)
679 os.chdir(wc_target)
680
681 if not options.cont_from_break:
682 # Get log entry for the SVN revision we will check out
683 if options.svn_rev:
684 # If specify a rev, get log entry just before or at rev
685 source_start_log = svnclient.get_last_svn_log_entry(source_url, 1, options.svn_rev, False)
686 else:
687 # Otherwise, get log entry of branch creation
688 # Note: Trying to use svnclient.get_first_svn_log_entry(source_url, 1, source_end_rev, False)
689 # ends-up being *VERY* time-consuming on a repo with lots of revisions. Even though
690 # the "svn log" call is passing --limit 1, it seems like that limit-filter is happening
691 # _after_ svn has fetched the full log history. Instead, search the history in chunks
692 # and write some progress to the screen.
693 ui.status("Searching for start source revision (%s)...", source_url, level=ui.VERBOSE)
694 rev = 1
695 chunk_size = 1000
696 done = False
697 while not done:
698 entries = svnclient.run_svn_log(source_url, rev, min(rev+chunk_size-1, target_info['revision']), 1, get_changed_paths=False)
699 if entries:
700 source_start_log = entries[0]
701 done = True
702 break
703 ui.status("...%s...", rev)
704 rev = rev+chunk_size
705 if rev > target_info['revision']:
706 done = True
707 if not source_start_log:
708 raise RuntimeError("Unable to find first revision for source_url: %s" % source_url)
709
710 # This is the revision we will start from for source_url
711 source_start_rev = source_rev = int(source_start_log['revision'])
712 ui.status("Starting at source revision %s.", source_start_rev, level=ui.VERBOSE)
713
714 # For the initial commit to the target URL, export all the contents from
715 # the source URL at the start-revision.
716 paths = run_svn(["list", "-r", source_rev, source_url+"@"+str(source_rev)])
717 if len(paths)>1:
718 disp_svn_log_summary(svnclient.get_one_svn_log_entry(source_url, source_rev, source_rev))
719 ui.status("(Initial import)", level=ui.VERBOSE)
720 paths = paths.strip("\n").split("\n")
721 for path_raw in paths:
722 # For each top-level file/folder...
723 if not path_raw:
724 continue
725 # Directories have a trailing slash in the "svn list" output
726 path_is_dir = True if path_raw[-1] == "/" else False
727 path = path_raw.rstrip('/') if path_is_dir else path_raw
728 if path_is_dir and not os.path.exists(path):
729 os.makedirs(path)
730 ui.status(" A %s", source_url[len(source_repos_url):]+"/"+path, level=ui.VERBOSE)
731 run_svn(["export", "--force", "-r" , source_rev, source_url+"/"+path+"@"+str(source_rev), path])
732 run_svn(["add", path])
733 target_revprops = gen_tracking_revprops(source_repos_uuid, source_url, source_rev) # Build source-tracking revprop's
734 target_rev = commit_from_svn_log_entry(source_start_log, options, target_revprops=target_revprops)
735 if target_rev:
736 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
737 set_rev_map(rev_map, source_rev, target_rev)
738 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
739 run_svn(["update"])
740 commit_count += 1
741 else:
742 # Re-build the rev_map based on any already-replayed history in target_url
743 rev_map = build_rev_map(target_url, source_info)
744 if not rev_map:
745 raise RuntimeError("Called with continue-mode, but no already-replayed history found in target repo: %s" % target_url)
746 source_start_rev = int(max(rev_map, key=rev_map.get))
747 assert source_start_rev
748 ui.status("Continuing from source revision %s.", source_start_rev, level=ui.VERBOSE)
749
750 svn_vers_t = svnclient.get_svn_client_version()
751 svn_vers = float(".".join(map(str, svn_vers_t[0:2])))
752
753 # Load SVN log starting from source_start_rev + 1
754 it_log_entries = svnclient.iter_svn_log_entries(source_url, source_start_rev+1, source_end_rev, get_revprops=True)
755 source_rev = None
756
757 try:
758 for log_entry in it_log_entries:
759 # Replay this revision from source_url into target_url
760 disp_svn_log_summary(log_entry)
761 source_rev = log_entry['revision']
762 # Process all the changed-paths in this log entry
763 commit_paths = []
764 process_svn_log_entry(log_entry, source_repos_url, source_url, target_url,
765 rev_map, options, commit_paths)
766 num_entries_proc += 1
767 # Commit any changes made to _wc_target
768 target_revprops = gen_tracking_revprops(source_repos_uuid, source_url, source_rev) # Build source-tracking revprop's
769 target_rev = commit_from_svn_log_entry(log_entry, options, commit_paths, target_revprops=target_revprops)
770 if target_rev:
771 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
772 source_rev = log_entry['revision']
773 set_rev_map(rev_map, source_rev, target_rev)
774 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
775 run_svn(["update"])
776 commit_count += 1
777 # Run "svn cleanup" every 100 commits if SVN 1.7+, to clean-up orphaned ".svn/pristines/*"
778 if svn_vers >= 1.7 and (commit_count % 100 == 0):
779 run_svn(["cleanup"])
780 if not source_rev:
781 # If there were no new source_url revisions to process, init source_rev
782 # for the "finally" message below.
783 source_rev = source_end_rev
784
785 except KeyboardInterrupt:
786 print "\nStopped by user."
787 print "\nCleaning-up..."
788 run_svn(["cleanup"])
789 full_svn_revert()
790 except:
791 print "\nCommand failed with following error:\n"
792 traceback.print_exc()
793 print "\nCleaning-up..."
794 run_svn(["cleanup"])
795 print run_svn(["status"])
796 full_svn_revert()
797 finally:
798 print "\nFinished at source revision %s." % source_rev
799
800 def main():
801 # Defined as entry point. Must be callable without arguments.
802 usage = """Usage: %prog [OPTIONS] source_url target_url
803
804 Replicate (replay) history from one SVN repository to another. Maintain
805 logical ancestry wherever possible, so that 'svn log' on the replayed
806 repo will correctly follow file/folder renames.
807
808 == Examples ==
809 Create a copy of only /trunk from source repo, starting at r5000
810 $ svnadmin create /svn/target
811 $ svn mkdir -m 'Add trunk' file:///svn/target/trunk
812 $ svn2svn -av -r 5000 http://server/source/trunk file:///svn/target/trunk
813 1. The target_url will be checked-out to ./_wc_target
814 2. The first commit to http://server/source/trunk at/after r5000 will be
815 exported & added into _wc_target
816 3. All revisions affecting http://server/source/trunk (starting at r5000)
817 will be replayed to _wc_target. Any add/copy/move/replaces that are
818 copy-from'd some path outside of /trunk (e.g. files renamed on a /branch
819 and branch was merged into /trunk) will correctly maintain logical
820 ancestry where possible.
821
822 Use continue-mode (-c) to pick-up where the last run left-off
823 $ svn2svn -avc http://server/source/trunk file:///svn/target/trunk
824 1. The target_url will be checked-out to ./_wc_target, if not already
825 checked-out
826 2. All new revisions affecting http://server/source/trunk starting from
827 the last replayed revision to file:///svn/target/trunk (based on the
828 svn2svn:* revprops) will be replayed to _wc_target, maintaining all
829 logical ancestry where possible."""
830 parser = OptionParser(usage)
831 parser.add_option("-r", "--revision", type="int", dest="svn_rev", metavar="REV",
832 help="initial SVN revision to start source_url replay")
833 parser.add_option("-a", "--keep-author", action="store_true", dest="keep_author",
834 help="maintain original Author info from source repo")
835 parser.add_option("-c", "--continue", action="store_true", dest="cont_from_break",
836 help="continue from previous break")
837 (options, args) = run_parser(parser)
838 if len(args) != 2:
839 display_parser_error(parser, "incorrect number of arguments")
840 return real_main(options, args)
841
842
843 if __name__ == "__main__":
844 sys.exit(main() or 0)