]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn/run/svn2svn.py
* Carry-forward revprop values from source repo
[svn2svn.git] / svn2svn / run / svn2svn.py
1 """
2 Replicate (replay) changesets from one SVN repository to another:
3 * Maintains full logical history (e.g. uses "svn copy" for renames).
4 * Maintains original commit messages.
5 * Optionally maintain source author info. (Only supported if accessing
6 target SVN repo via file://)
7 * Cannot maintain original commit date, but appends original commit date
8 for each commit message: "Date: %d".
9 * Optionally run an external shell script before each replayed commit
10 to give the ability to dynamically exclude or modify files as part
11 of the replay.
12
13 License: GPLv3, same as hgsvn (https://bitbucket.org/andialbrecht/hgsvn)
14 Author: Tony Duckles (https://github.com/tonyduckles/svn2svn)
15 (Inspired by http://code.google.com/p/svn2svn/, and uses code for hgsvn
16 for SVN client handling)
17 """
18
19 from .. import base_version, full_version
20 from .. import ui
21 from .. import svnclient
22 from ..shell import run_svn
23 from ..errors import (ExternalCommandFailed, UnsupportedSVNAction)
24
25 import sys
26 import os
27 import time
28 import traceback
29 import shutil
30 import operator
31 from optparse import OptionParser,OptionGroup
32 from datetime import datetime
33
34 def commit_from_svn_log_entry(log_entry, files=None, keep_author=False, target_revprops=[]):
35 """
36 Given an SVN log entry and an optional sequence of files, do an svn commit.
37 """
38 # TODO: Run optional external shell hook here, for doing pre-commit filtering
39 # This will use the local timezone for displaying commit times
40 timestamp = int(log_entry['date'])
41 svn_date = str(datetime.fromtimestamp(timestamp))
42 # Uncomment this one one if you prefer UTC commit times
43 #svn_date = "%d 0" % timestamp
44 if keep_author:
45 options = ["commit", "--force-log", "-m", log_entry['message'] + "\nDate: " + svn_date, "--username", log_entry['author']]
46 else:
47 options = ["commit", "--force-log", "-m", log_entry['message'] + "\nDate: " + svn_date + "\nAuthor: " + log_entry['author']]
48 revprops = {}
49 if log_entry['revprops']:
50 # Carry forward any revprop's from the source revision
51 for v in log_entry['revprops']:
52 revprops[v['name']] = v['value']
53 if target_revprops:
54 # Add any extra revprop's we want to set for the target repo commits
55 for v in target_revprops:
56 revprops[v['name']] = v['value']
57 if revprops:
58 for key in revprops:
59 options += ["--with-revprop", "%s=%s" % (key, str(revprops[key]))]
60 if files:
61 options += list(files)
62 output = run_svn(options)
63 rev = None
64 if output:
65 output_lines = output.strip("\n").split("\n")
66 rev = ""
67 for line in output_lines:
68 if line[0:19] == 'Committed revision ':
69 rev = line[19:].rstrip('.')
70 break
71 if rev:
72 ui.status("Committed revision %s.", rev)
73 return rev
74
75 def full_svn_revert():
76 """
77 Do an "svn revert" and proactively remove any extra files in the working copy.
78 """
79 run_svn(["revert", "--recursive", "."])
80 output = run_svn(["status"])
81 if output:
82 output_lines = output.strip("\n").split("\n")
83 for line in output_lines:
84 if line[0] == "?":
85 path = line[4:].strip(" ")
86 if os.path.isfile(path):
87 os.remove(path)
88 if os.path.isdir(path):
89 shutil.rmtree(path)
90
91 def gen_tracking_revprops(source_repos_uuid, source_url, source_rev):
92 """
93 Build an array of svn2svn-specific source-tracking revprops.
94 """
95 revprops = [{'name':'svn2svn:source_uuid', 'value':source_repos_uuid},
96 {'name':'svn2svn:source_url', 'value':source_url},
97 {'name':'svn2svn:source_rev', 'value':source_rev}]
98 return revprops
99
100 def in_svn(p, require_in_repo=False, prefix=""):
101 """
102 Check if a given file/folder is being tracked by Subversion.
103 Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
104 With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
105 Use "svn status" to check the status of the file/folder.
106 """
107 entries = svnclient.get_svn_status(p, no_recursive=True)
108 if not entries:
109 return False
110 d = entries[0]
111 if require_in_repo and (d['status'] == 'added' or d['revision'] is None):
112 # If caller requires this path to be in the SVN repo, prevent returning True
113 # for paths that are only locally-added.
114 ret = False
115 else:
116 # Don't consider files tracked as deleted in the WC as under source-control.
117 # Consider files which are locally added/copied as under source-control.
118 ret = True if not (d['status'] == 'deleted') and (d['type'] == 'normal' or d['status'] == 'added' or d['copied'] == 'true') else False
119 ui.status(prefix + ">> in_svn('%s', require_in_repo=%s) --> %s", p, str(require_in_repo), str(ret), level=ui.DEBUG, color='GREEN')
120 return ret
121
122 def find_svn_ancestors(svn_repos_url, base_path, source_path, source_rev, prefix = ""):
123 """
124 Given a source path, walk the SVN history backwards to inspect the ancestory of
125 that path, seeing if it traces back to base_path. Build an array of copyfrom_path
126 and copyfrom_revision pairs for each of the "svn copies". If we find a copyfrom_path
127 which base_path is a substring match of (e.g. we crawled back to the initial branch-
128 copy from trunk), then return the collection of ancestor paths. Otherwise,
129 copyfrom_path has no ancestory compared to base_path.
130
131 This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
132 file/folder was renamed in a branch and then that branch was merged back to trunk.
133
134 'svn_repos_url' is the full URL to the root of the SVN repository,
135 e.g. 'file:///path/to/repo'
136 'base_path' is the path in the SVN repo to the target path we're trying to
137 trace ancestry back to, e.g. 'trunk'.
138 'source_path' is the path in the SVN repo to the source path to start checking
139 ancestry at, e.g. 'branches/fix1/projectA/file1.txt'.
140 (full_path = svn_repos_url+base_path+"/"+path_offset)
141 'source_rev' is the revision to start walking the history of source_path backwards from.
142 """
143 ui.status(prefix + ">> find_svn_ancestors: Start: (%s) source_path: %s base_path: %s",
144 svn_repos_url, source_path+"@"+str(source_rev), base_path, level=ui.DEBUG, color='YELLOW')
145 done = False
146 working_path = base_path+"/"+source_path
147 working_rev = source_rev
148 first_iter_done = False
149 ancestors_temp = []
150 while not done:
151 # Get the first "svn log" entry for this path (relative to @rev)
152 ui.status(prefix + ">> find_svn_ancestors: %s", svn_repos_url + working_path+"@"+str(working_rev), level=ui.DEBUG, color='YELLOW')
153 log_entry = svnclient.get_first_svn_log_entry(svn_repos_url + working_path+"@"+str(working_rev), 1, working_rev, True)
154 if not log_entry:
155 ui.status(prefix + ">> find_svn_ancestors: Done: no log_entry", level=ui.DEBUG, color='YELLOW')
156 done = True
157 break
158 # If we found a copy-from case which matches our base_path, we're done.
159 # ...but only if we've at least tried to search for the first copy-from path.
160 if first_iter_done and working_path.startswith(base_path):
161 ui.status(prefix + ">> find_svn_ancestors: Done: Found working_path.startswith(base_path) and first_iter_done=True", level=ui.DEBUG, color='YELLOW')
162 done = True
163 break
164 first_iter_done = True
165 # Search for any actions on our target path (or parent paths).
166 changed_paths_temp = []
167 for d in log_entry['changed_paths']:
168 path = d['path']
169 if path in working_path:
170 changed_paths_temp.append({'path': path, 'data': d})
171 if not changed_paths_temp:
172 # If no matches, then we've hit the end of the chain and this path has no ancestry back to base_path.
173 ui.status(prefix + ">> find_svn_ancestors: Done: No matching changed_paths", level=ui.DEBUG, color='YELLOW')
174 done = True
175 continue
176 # Reverse-sort any matches, so that we start with the most-granular (deepest in the tree) path.
177 changed_paths = sorted(changed_paths_temp, key=operator.itemgetter('path'), reverse=True)
178 # Find the action for our working_path in this revision. Use a loop to check in reverse order,
179 # so that if the target file/folder is "M" but has a parent folder with an "A" copy-from.
180 for v in changed_paths:
181 d = v['data']
182 path = d['path']
183 # Check action-type for this file
184 action = d['action']
185 if action not in 'MARD':
186 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
187 % (log_entry['revision'], action))
188 ui.status(prefix + "> %s %s%s", action, path,
189 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
190 level=ui.DEBUG, color='YELLOW')
191 if action == 'D':
192 # If file/folder was deleted, it has no ancestor
193 ancestors_temp = []
194 ui.status(prefix + ">> find_svn_ancestors: Done: deleted", level=ui.DEBUG, color='YELLOW')
195 done = True
196 break
197 if action in 'RA':
198 # If file/folder was added/replaced but not a copy, it has no ancestor
199 if not d['copyfrom_path']:
200 ancestors_temp = []
201 ui.status(prefix + ">> find_svn_ancestors: Done: %s with no copyfrom_path",
202 "Added" if action == "A" else "Replaced",
203 level=ui.DEBUG, color='YELLOW')
204 done = True
205 break
206 # Else, file/folder was added/replaced and is a copy, so add an entry to our ancestors list
207 # and keep checking for ancestors
208 ui.status(prefix + ">> find_svn_ancestors: Found copy-from (action=%s): %s --> %s",
209 action, path, d['copyfrom_path']+"@"+str(d['copyfrom_revision']),
210 level=ui.DEBUG, color='YELLOW')
211 ancestors_temp.append({'path': path, 'revision': log_entry['revision'],
212 'copyfrom_path': d['copyfrom_path'], 'copyfrom_rev': d['copyfrom_revision']})
213 working_path = working_path.replace(d['path'], d['copyfrom_path'])
214 working_rev = d['copyfrom_revision']
215 # Follow the copy and keep on searching
216 break
217 ancestors = []
218 if ancestors_temp:
219 ancestors.append({'path': base_path+"/"+source_path, 'revision': source_rev})
220 working_path = base_path+"/"+source_path
221 for idx in range(len(ancestors_temp)):
222 d = ancestors_temp[idx]
223 working_path = working_path.replace(d['path'], d['copyfrom_path'])
224 working_rev = d['copyfrom_rev']
225 ancestors.append({'path': working_path, 'revision': working_rev})
226 max_len = 0
227 for idx in range(len(ancestors)):
228 d = ancestors[idx]
229 max_len = max(max_len, len(d['path']+"@"+str(d['revision'])))
230 ui.status(prefix + ">> find_svn_ancestors: Found parent ancestors:", level=ui.DEBUG, color='YELLOW_B')
231 for idx in range(len(ancestors)-1):
232 d = ancestors[idx]
233 d_next = ancestors[idx+1]
234 ui.status(prefix + " [%s] %s <-- %s", idx,
235 str(d['path']+"@"+str(d['revision'])).ljust(max_len),
236 str(d_next['path']+"@"+str(d_next['revision'])).ljust(max_len),
237 level=ui.DEBUG, color='YELLOW')
238 else:
239 ui.status(prefix + ">> find_svn_ancestors: No ancestor-chain found: %s",
240 svn_repos_url+base_path+"/"+source_path+"@"+str(source_rev), level=ui.DEBUG, color='YELLOW')
241 return ancestors
242
243 def get_rev_map(rev_map, source_rev, prefix):
244 """
245 Find the equivalent rev # in the target repo for the given rev # from the source repo.
246 """
247 ui.status(prefix + ">> get_rev_map(%s)", source_rev, level=ui.DEBUG, color='GREEN')
248 # Find the highest entry less-than-or-equal-to source_rev
249 for rev in range(int(source_rev), 0, -1):
250 ui.status(prefix + ">> get_rev_map: rev=%s in_rev_map=%s", rev, str(rev in rev_map), level=ui.DEBUG, color='BLACK_B')
251 if rev in rev_map:
252 return int(rev_map[rev])
253 # Else, we fell off the bottom of the rev_map. Ruh-roh...
254 return None
255
256 def set_rev_map(rev_map, source_rev, target_rev):
257 ui.status(">> set_rev_map: source_rev=%s target_rev=%s", source_rev, target_rev, level=ui.DEBUG, color='GREEN')
258 rev_map[int(source_rev)]=int(target_rev)
259
260 def build_rev_map(target_url, source_info):
261 """
262 Check for any already-replayed history from source_url (source_info) and
263 build the mapping-table of source_rev -> target_rev.
264 """
265 rev_map = {}
266 ui.status("Rebuilding rev_map...", level=ui.VERBOSE)
267 proc_count = 0
268 it_log_entries = svnclient.iter_svn_log_entries(target_url, 1, 'HEAD', get_changed_paths=False, get_revprops=True)
269 for log_entry in it_log_entries:
270 if log_entry['revprops']:
271 revprops = {}
272 for v in log_entry['revprops']:
273 if v['name'].startswith('svn2svn:'):
274 revprops[v['name']] = v['value']
275 if revprops['svn2svn:source_uuid'] == source_info['repos_uuid'] and \
276 revprops['svn2svn:source_url'] == source_info['url']:
277 source_rev = revprops['svn2svn:source_rev']
278 target_rev = log_entry['revision']
279 set_rev_map(rev_map, source_rev, target_rev)
280 return rev_map
281
282 def get_svn_dirlist(svn_path, svn_rev = ""):
283 """
284 Get a list of all the child contents (recusive) of the given folder path.
285 """
286 args = ["list"]
287 path = svn_path
288 if svn_rev:
289 args += ["-r", svn_rev]
290 path += "@"+str(svn_rev)
291 args += [path]
292 paths = run_svn(args, no_fail=True)
293 paths = paths.strip("\n").split("\n") if len(paths)>1 else []
294 return paths
295
296 def _add_export_path(export_paths, path_offset):
297 found = False
298 for p in export_paths:
299 if path_offset.startswith(p):
300 found = True
301 break
302 if not found:
303 export_paths.append(path_offset)
304 return export_paths
305
306 def do_svn_add(source_repos_url, source_url, path_offset, target_url, source_rev, \
307 parent_copyfrom_path="", parent_copyfrom_rev="", export_paths={}, \
308 rev_map={}, is_dir = False, prefix = ""):
309 """
310 Given the add'd source path, replay the "svn add/copy" commands to correctly
311 track renames across copy-from's.
312
313 For example, consider a sequence of events like this:
314 1. svn copy /trunk /branches/fix1
315 2. (Make some changes on /branches/fix1)
316 3. svn mv /branches/fix1/Proj1 /branches/fix1/Proj2 " Rename folder
317 4. svn mv /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt " Rename file inside renamed folder
318 5. svn co /trunk && svn merge /branches/fix1
319 After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
320 and and add of /trunk/Proj2 copy-from /branches/fix1/Proj2. If we were just
321 to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
322 we'd lose the logical history that Proj2/file2.txt is really a descendant
323 of Proj1/file1.txt.
324
325 'source_repos_url' is the full URL to the root of the source repository.
326 'source_url' is the full URL to the source path in the source repository.
327 'path_offset' is the offset from source_base to the file to check ancestry for,
328 e.g. 'projectA/file1.txt'. path = source_repos_url + source_base + path_offset.
329 'target_url' is the full URL to the target path in the target repository.
330 'source_rev' is the revision ("svn log") that we're processing from the source repo.
331 'parent_copyfrom_path' and 'parent_copyfrom_rev' is the copy-from path of the parent
332 directory, when being called recursively by do_svn_add_dir().
333 'export_paths' is the list of path_offset's that we've deferred running "svn export" on.
334 'rev_map' is the running mapping-table dictionary for source-repo rev #'s
335 to the equivalent target-repo rev #'s.
336 'is_dir' is whether path_offset is a directory (rather than a file).
337 """
338 source_base = source_url[len(source_repos_url):]
339 ui.status(prefix + ">> do_svn_add: %s %s", source_base+"/"+path_offset+"@"+str(source_rev),
340 " (parent-copyfrom: "+parent_copyfrom_path+"@"+str(parent_copyfrom_rev)+")" if parent_copyfrom_path else "",
341 level=ui.DEBUG, color='GREEN')
342 # Check if the given path has ancestors which chain back to the current source_base
343 found_ancestor = False
344 ancestors = find_svn_ancestors(source_repos_url, source_base, path_offset, source_rev, prefix+" ")
345 # ancestors[n] is the original (pre-branch-copy) trunk path.
346 # ancestors[n-1] is the first commit on the new branch.
347 copyfrom_path = ancestors[len(ancestors)-1]['path'] if ancestors else ""
348 copyfrom_rev = ancestors[len(ancestors)-1]['revision'] if ancestors else ""
349 if ancestors:
350 # The copy-from path has ancestory back to source_url.
351 ui.status(prefix + ">> do_svn_add: Check copy-from: Found parent: %s", copyfrom_path+"@"+str(copyfrom_rev),
352 level=ui.DEBUG, color='GREEN', bold=True)
353 found_ancestor = True
354 # Map the copyfrom_rev (source repo) to the equivalent target repo rev #. This can
355 # return None in the case where copyfrom_rev is *before* our source_start_rev.
356 tgt_rev = get_rev_map(rev_map, copyfrom_rev, prefix+" ")
357 ui.status(prefix + ">> do_svn_add: get_rev_map: %s (source) -> %s (target)", copyfrom_rev, tgt_rev, level=ui.DEBUG, color='GREEN')
358 else:
359 ui.status(prefix + ">> do_svn_add: Check copy-from: No ancestor chain found.", level=ui.DEBUG, color='GREEN')
360 found_ancestor = False
361 if found_ancestor and tgt_rev:
362 # Check if this path_offset in the target WC already has this ancestry, in which
363 # case there's no need to run the "svn copy" (again).
364 path_in_svn = in_svn(path_offset, prefix=prefix+" ")
365 log_entry = svnclient.get_last_svn_log_entry(path_offset, 1, 'HEAD', get_changed_paths=False) if in_svn(path_offset, require_in_repo=True, prefix=prefix+" ") else []
366 if (not log_entry or (log_entry['revision'] != tgt_rev)):
367 copyfrom_offset = copyfrom_path[len(source_base):].strip('/')
368 ui.status(prefix + ">> do_svn_add: svn_copy: Copy-from: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
369 ui.status(prefix + " copyfrom: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
370 ui.status(prefix + " p_copyfrom: %s", parent_copyfrom_path+"@"+str(parent_copyfrom_rev) if parent_copyfrom_path else "", level=ui.DEBUG, color='GREEN')
371 if path_in_svn and \
372 ((parent_copyfrom_path and copyfrom_path.startswith(parent_copyfrom_path)) and \
373 (parent_copyfrom_rev and copyfrom_rev == parent_copyfrom_rev)):
374 # When being called recursively, if this child entry has the same ancestor as the
375 # the parent, then no need to try to run another "svn copy".
376 ui.status(prefix + ">> do_svn_add: svn_copy: Same ancestry as parent: %s",
377 parent_copyfrom_path+"@"+str(parent_copyfrom_rev),level=ui.DEBUG, color='GREEN')
378 pass
379 else:
380 # Copy this path from the equivalent path+rev in the target repo, to create the
381 # equivalent history.
382 if parent_copyfrom_path:
383 # If we have a parent copy-from path, we mis-match that so display a status
384 # message describing the action we're mimic'ing. If path_in_svn, then this
385 # is logically a "replace" rather than an "add".
386 ui.status(" %s %s (from %s)", ('R' if path_in_svn else 'A'), source_base+"/"+path_offset, ancestors[1]['path']+"@"+str(copyfrom_rev), level=ui.VERBOSE)
387 if path_in_svn:
388 # If local file is already under version-control, then this is a replace.
389 ui.status(prefix + ">> do_svn_add: pre-copy: local path already exists: %s", path_offset, level=ui.DEBUG, color='GREEN')
390 run_svn(["remove", "--force", path_offset])
391 run_svn(["copy", "-r", tgt_rev, target_url+"/"+copyfrom_offset+"@"+str(tgt_rev), path_offset])
392 # Export the final version of this file/folder from the source repo, to make
393 # sure we're up-to-date.
394 export_paths = _add_export_path(export_paths, path_offset)
395 else:
396 ui.status(prefix + ">> do_svn_add: Skipped 'svn copy': %s", path_offset, level=ui.DEBUG, color='GREEN')
397 else:
398 # Else, either this copy-from path has no ancestry back to source_url OR copyfrom_rev comes
399 # before our initial source_start_rev (i.e. tgt_rev == None), so can't do a "svn copy".
400 # Create (parent) directory if needed.
401 # TODO: This is (nearly) a duplicate of code in process_svn_log_entry(). Should this be
402 # split-out to a shared tag?
403 p_path = path_offset if is_dir else os.path.dirname(path_offset).strip() or '.'
404 if not os.path.exists(p_path):
405 run_svn(["mkdir", p_path])
406 if not in_svn(path_offset, prefix=prefix+" "):
407 if is_dir:
408 # Export the final verison of all files in this folder.
409 export_paths = _add_export_path(export_paths, path_offset)
410 else:
411 # Export the final verison of this file. We *need* to do this before running
412 # the "svn add", even if we end-up re-exporting this file again via export_paths.
413 run_svn(["export", "--force", "-r", source_rev,
414 source_repos_url+source_base+"/"+path_offset+"@"+str(source_rev), path_offset])
415 # If not already under version-control, then "svn add" this file/folder.
416 run_svn(["add", "--parents", path_offset])
417 # TODO: Need to copy SVN properties from source repos
418 if is_dir:
419 # For any folders that we process, process any child contents, so that we correctly
420 # replay copies/replaces/etc.
421 do_svn_add_dir(source_repos_url, source_url, path_offset, source_rev, target_url,
422 copyfrom_path, copyfrom_rev, export_paths, rev_map, prefix+" ")
423
424 def do_svn_add_dir(source_repos_url, source_url, path_offset, source_rev, target_url, \
425 parent_copyfrom_path, parent_copyfrom_rev, export_paths, rev_map, prefix=""):
426 source_base = source_url[len(source_repos_url):]
427 # Get the directory contents, to compare between the local WC (target_url) vs. the remote repo (source_url)
428 # TODO: paths_local won't include add'd paths because "svn ls" lists the contents of the
429 # associated remote repo folder. (Is this a problem?)
430 paths_local = get_svn_dirlist(path_offset)
431 paths_remote = get_svn_dirlist(source_url+"/"+path_offset, source_rev)
432 ui.status(prefix + ">> do_svn_add_dir: paths_local: %s", str(paths_local), level=ui.DEBUG, color='GREEN')
433 ui.status(prefix + ">> do_svn_add_dir: paths_remote: %s", str(paths_remote), level=ui.DEBUG, color='GREEN')
434 # Update files/folders which exist in remote but not local
435 for path in paths_remote:
436 path_is_dir = True if path[-1] == "/" else False
437 working_path = path_offset+"/"+(path.rstrip('/') if path_is_dir else path)
438 do_svn_add(source_repos_url, source_url, working_path, target_url, source_rev,
439 parent_copyfrom_path, parent_copyfrom_rev, export_paths,
440 rev_map, path_is_dir, prefix+" ")
441 # Remove files/folders which exist in local but not remote
442 for path in paths_local:
443 if not path in paths_remote:
444 ui.status(" %s %s", 'D', source_base+"/"+path_offset+"/"+path, level=ui.VERBOSE)
445 run_svn(["remove", "--force", path_offset+"/"+path])
446 # TODO: Does this handle deleted folders too? Wouldn't want to have a case
447 # where we only delete all files from folder but leave orphaned folder around.
448
449 def process_svn_log_entry(log_entry, source_repos_url, source_url, target_url, \
450 rev_map, commit_paths = [], prefix = ""):
451 """
452 Process SVN changes from the given log entry.
453 Returns array of all the paths in the working-copy that were changed,
454 i.e. the paths which need to be "svn commit".
455
456 'log_entry' is the array structure built by parse_svn_log_xml().
457 'source_repos_url' is the full URL to the root of the source repository.
458 'source_url' is the full URL to the source path in the source repository.
459 'target_url' is the full URL to the target path in the target repository.
460 'rev_map' is the running mapping-table dictionary for source-repo rev #'s
461 to the equivalent target-repo rev #'s.
462 'commit_paths' is the working list of specific paths which changes to pass
463 to the final "svn commit".
464 """
465 removed_paths = []
466 export_paths = []
467 # Get the relative offset of source_url based on source_repos_url
468 # e.g. '/branches/bug123'
469 source_base = source_url[len(source_repos_url):]
470 source_rev = log_entry['revision']
471 ui.status(prefix + ">> process_svn_log_entry: %s", source_url+"@"+str(source_rev), level=ui.DEBUG, color='GREEN')
472 for d in log_entry['changed_paths']:
473 # Get the full path for this changed_path
474 # e.g. '/branches/bug123/projectA/file1.txt'
475 path = d['path']
476 if not path.startswith(source_base + "/"):
477 # Ignore changed files that are not part of this subdir
478 if path != source_base:
479 ui.status(prefix + ">> process_svn_log_entry: Unrelated path: %s (base: %s)", path, source_base, level=ui.DEBUG, color='GREEN')
480 continue
481 # Calculate the offset (based on source_base) for this changed_path
482 # e.g. 'projectA/file1.txt'
483 # (path = source_base + "/" + path_offset)
484 path_offset = path[len(source_base):].strip("/")
485 # Get the action for this path
486 action = d['action']
487 if action not in 'MARD':
488 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
489 % (source_rev, action))
490 if action not in 'D':
491 # (Note: Skip displaying action message for 'D' here since we'll display that
492 # message when we process the deferred delete actions at the end.)
493 ui.status(" %s %s%s", action, d['path'],
494 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
495 level=ui.VERBOSE)
496
497 # Try to be efficient and keep track of an explicit list of paths in the
498 # working copy that changed. If we commit from the root of the working copy,
499 # then SVN needs to crawl the entire working copy looking for pending changes.
500 # But, if we gather too many paths to commit, then we wipe commit_paths below
501 # and end-up doing a commit at the root of the working-copy.
502 if len (commit_paths) < 100:
503 commit_paths.append(path_offset)
504
505 # Special-handling for replace's
506 if action == 'R':
507 # If file was "replaced" (deleted then re-added, all in same revision),
508 # then we need to run the "svn rm" first, then change action='A'. This
509 # lets the normal code below handle re-"svn add"'ing the files. This
510 # should replicate the "replace".
511 run_svn(["remove", "--force", path_offset])
512 action = 'A'
513
514 # Handle all the various action-types
515 # (Handle "add" first, for "svn copy/move" support)
516 if action == 'A':
517 # If we have any queued deletions for this same path, remove those if we're re-adding this path.
518 if path_offset in removed_paths:
519 removed_paths.remove(path_offset)
520 # Determine where to export from.
521 svn_copy = False
522 path_is_dir = True if d['kind'] == 'dir' else False
523 # Handle cases where this "add" was a copy from another URL in the source repos
524 if d['copyfrom_revision']:
525 copyfrom_path = d['copyfrom_path']
526 copyfrom_rev = d['copyfrom_revision']
527 do_svn_add(source_repos_url, source_url, path_offset, target_url, source_rev,
528 "", "", export_paths, rev_map, path_is_dir, prefix+" ")
529 # Else just "svn export" the files from the source repo and "svn add" them.
530 else:
531 # Create (parent) directory if needed
532 p_path = path_offset if path_is_dir else os.path.dirname(path_offset).strip() or '.'
533 if not os.path.exists(p_path):
534 run_svn(["mkdir", p_path])
535 # Export the entire added tree.
536 if path_is_dir:
537 export_paths = _add_export_path(export_paths, path_offset)
538 else:
539 # Export the final verison of this file. We *need* to do this before running
540 # the "svn add", even if we end-up re-exporting this file again via export_paths.
541 run_svn(["export", "--force", "-r", source_rev,
542 source_repos_url+source_base+"/"+path_offset+"@"+str(source_rev), path_offset])
543 if not in_svn(path_offset, prefix=prefix+" "):
544 # Need to use in_svn here to handle cases where client committed the parent
545 # folder and each indiv sub-folder.
546 run_svn(["add", "--parents", path_offset])
547 # TODO: Need to copy SVN properties from source repos
548
549 elif action == 'D':
550 # Queue "svn remove" commands, to allow the action == 'A' handling the opportunity
551 # to do smart "svn copy" handling on copy/move/renames.
552 if not path_offset in removed_paths:
553 removed_paths.append(path_offset)
554
555 elif action == 'M':
556 # TODO: Is "svn merge -c" correct here? Should this just be an "svn export" plus
557 # proplist updating?
558 out = run_svn(["merge", "-c", source_rev, "--non-recursive",
559 "--non-interactive", "--accept=theirs-full",
560 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
561
562 else:
563 raise SVNError("Internal Error: process_svn_log_entry: Unhandled 'action' value: '%s'"
564 % action)
565
566 # Process any deferred removed actions
567 if removed_paths:
568 path_base = source_url[len(source_repos_url):]
569 for path_offset in removed_paths:
570 ui.status(" %s %s", 'D', path_base+"/"+path_offset, level=ui.VERBOSE)
571 run_svn(["remove", "--force", path_offset])
572 # Export the final version of all add'd paths from source_url
573 if export_paths:
574 for path_offset in export_paths:
575 run_svn(["export", "--force", "-r", source_rev,
576 source_repos_url+source_base+"/"+path_offset+"@"+str(source_rev), path_offset])
577
578 return commit_paths
579
580 def disp_svn_log_summary(log_entry):
581 ui.status("")
582 ui.status("r%s | %s | %s",
583 log_entry['revision'],
584 log_entry['author'],
585 str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' ')))
586 ui.status(log_entry['message'])
587 ui.status("------------------------------------------------------------------------")
588
589 def pull_svn_rev(log_entry, source_repos_url, source_repos_uuid, source_url, target_url, rev_map, keep_author=False):
590 """
591 Pull SVN changes from the given log entry.
592 Returns the new SVN revision.
593 If an exception occurs, it will rollback to revision 'source_rev - 1'.
594 """
595 disp_svn_log_summary(log_entry)
596 source_rev = log_entry['revision']
597
598 # Process all the paths in this log entry
599 commit_paths = []
600 process_svn_log_entry(log_entry, source_repos_url, source_url, target_url,
601 rev_map, commit_paths)
602 # If we had too many individual paths to commit, wipe the list and just commit at
603 # the root of the working copy.
604 if len (commit_paths) > 99:
605 commit_paths = []
606
607 target_revprops = gen_tracking_revprops(source_repos_uuid, source_url, source_rev) # Build source-tracking revprop's
608 return commit_from_svn_log_entry(log_entry, commit_paths, \
609 keep_author=keep_author, target_revprops=target_revprops)
610
611 def run_parser(parser):
612 """
613 Add common options to an OptionParser instance, and run parsing.
614 """
615 parser.add_option("", "--version", dest="show_version", action="store_true",
616 help="show version and exit")
617 parser.remove_option("--help")
618 parser.add_option("-h", "--help", dest="show_help", action="store_true",
619 help="show this help message and exit")
620 parser.add_option("-v", "--verbose", dest="verbosity", const=ui.VERBOSE,
621 default=10, action="store_const",
622 help="enable additional output")
623 parser.add_option("--debug", dest="verbosity", const=ui.DEBUG,
624 action="store_const",
625 help="enable debugging output")
626 options, args = parser.parse_args()
627 if options.show_help:
628 parser.print_help()
629 sys.exit(0)
630 if options.show_version:
631 prog_name = os.path.basename(sys.argv[0])
632 print prog_name, full_version
633 sys.exit(0)
634 ui.update_config(options)
635 return options, args
636
637 def display_parser_error(parser, message):
638 """
639 Display an options error, and terminate.
640 """
641 print "error:", message
642 print
643 parser.print_help()
644 sys.exit(1)
645
646 def real_main(options, args):
647 source_url = args.pop(0).rstrip("/")
648 target_url = args.pop(0).rstrip("/")
649 if options.keep_author:
650 keep_author = True
651 else:
652 keep_author = False
653
654 # Make sure that both the source and target URL's are valid
655 source_info = svnclient.get_svn_info(source_url)
656 assert source_url.startswith(source_info['repos_url'])
657 target_info = svnclient.get_svn_info(target_url)
658 assert target_url.startswith(target_info['repos_url'])
659
660 source_end_rev = source_info['revision'] # Get the last revision # for the source repo
661 source_repos_url = source_info['repos_url'] # Get the base URL for the source repo, e.g. 'svn://svn.example.com/svn/repo'
662 source_repos_uuid = source_info['repos_uuid'] # Get the UUID for the source repo
663
664 wc_target = os.path.abspath('_wc_target')
665 rev_map = {}
666
667 # Check out a working copy of target_url if needed
668 wc_exists = os.path.exists(wc_target)
669 if wc_exists and not options.cont_from_break:
670 shutil.rmtree(wc_target)
671 wc_exists = False
672 if not wc_exists:
673 svnclient.svn_checkout(target_url, wc_target)
674 os.chdir(wc_target)
675
676 if not options.cont_from_break:
677 # Get log entry for the SVN revision we will check out
678 if options.svn_rev:
679 # If specify a rev, get log entry just before or at rev
680 source_start_log = svnclient.get_last_svn_log_entry(source_url, 1, options.svn_rev, False)
681 else:
682 # Otherwise, get log entry of branch creation
683 # Note: Trying to use svnclient.get_first_svn_log_entry(source_url, 1, source_end_rev, False)
684 # ends-up being *VERY* time-consuming on a repo with lots of revisions. Even though
685 # the "svn log" call is passing --limit 1, it seems like that limit-filter is happening
686 # _after_ svn has fetched the full log history. Instead, search the history in chunks
687 # and write some progress to the screen.
688 ui.status("Searching for start source revision (%s)...", source_url, level=ui.VERBOSE)
689 rev = 1
690 chunk_size = 1000
691 done = False
692 while not done:
693 entries = svnclient.run_svn_log(source_url, rev, min(rev+chunk_size-1, target_info['revision']), 1, get_changed_paths=False)
694 if entries:
695 source_start_log = entries[0]
696 done = True
697 break
698 ui.status("...%s...", rev)
699 rev = rev+chunk_size
700 if rev > target_info['revision']:
701 done = True
702 if not source_start_log:
703 raise RuntimeError("Unable to find first revision for source_url: %s" % source_url)
704
705 # This is the revision we will start from for source_url
706 source_start_rev = source_rev = int(source_start_log['revision'])
707 ui.status("Starting at source revision %s.", source_start_rev, level=ui.VERBOSE)
708
709 # For the initial commit to the target URL, export all the contents from
710 # the source URL at the start-revision.
711 paths = run_svn(["list", "-r", source_rev, source_url+"@"+str(source_rev)])
712 if len(paths)>1:
713 disp_svn_log_summary(svnclient.get_one_svn_log_entry(source_url, source_rev, source_rev))
714 ui.status("(Initial import)", level=ui.VERBOSE)
715 paths = paths.strip("\n").split("\n")
716 for path_raw in paths:
717 # For each top-level file/folder...
718 if not path_raw:
719 continue
720 # Directories have a trailing slash in the "svn list" output
721 path_is_dir = True if path_raw[-1] == "/" else False
722 path = path_raw.rstrip('/') if path_is_dir else path_raw
723 if path_is_dir and not os.path.exists(path):
724 os.makedirs(path)
725 ui.status(" A %s", source_url[len(source_repos_url):]+"/"+path, level=ui.VERBOSE)
726 run_svn(["export", "--force", "-r" , source_rev, source_url+"/"+path+"@"+str(source_rev), path])
727 run_svn(["add", path])
728 target_revprops = gen_tracking_revprops(source_repos_uuid, source_url, source_rev) # Build source-tracking revprop's
729 target_rev = commit_from_svn_log_entry(source_start_log, \
730 keep_author=keep_author, target_revprops=target_revprops)
731 if target_rev:
732 set_rev_map(rev_map, source_rev, target_rev)
733 else:
734 # Re-build the rev_map based on any already-replayed history in target_url
735 rev_map = build_rev_map(target_url, source_info)
736 if not rev_map:
737 raise RuntimeError("Called with continue-mode, but no already-replayed history found in target repo: %s" % target_url)
738 source_start_rev = int(max(rev_map, key=rev_map.get))
739 assert source_start_rev
740 ui.status("Continuing from source revision %s.", source_start_rev, level=ui.VERBOSE)
741
742 commit_count = 0
743 svn_vers_t = svnclient.get_svn_client_version()
744 svn_vers = float(".".join(map(str, svn_vers_t[0:2])))
745
746 # Load SVN log starting from source_start_rev + 1
747 it_log_entries = svnclient.iter_svn_log_entries(source_url, source_start_rev+1, source_end_rev, get_revprops=True)
748
749 try:
750 for log_entry in it_log_entries:
751 # Replay this revision from source_url into target_url
752 target_rev = pull_svn_rev(log_entry, source_repos_url, source_repos_uuid, source_url,
753 target_url, rev_map, keep_author)
754 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
755 run_svn(["update"])
756 commit_count += 1
757 # Run "svn cleanup" every 100 commits if SVN 1.7+, to clean-up orphaned ".svn/pristines/*"
758 if svn_vers >= 1.7 and (commit_count % 100 == 0):
759 run_svn(["cleanup"])
760 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
761 if target_rev:
762 source_rev = log_entry['revision']
763 set_rev_map(rev_map, source_rev, target_rev)
764
765 except KeyboardInterrupt:
766 print "\nStopped by user."
767 run_svn(["cleanup"])
768 full_svn_revert()
769 except:
770 print "\nCommand failed with following error:\n"
771 traceback.print_exc()
772 run_svn(["cleanup"])
773 print run_svn(["status"])
774 full_svn_revert()
775 finally:
776 run_svn(["update"])
777 print "\nFinished!"
778
779 def main():
780 # Defined as entry point. Must be callable without arguments.
781 usage = """Usage: %prog [OPTIONS] source_url target_url
782
783 Replicate (replay) history from one SVN repository to another. Maintain
784 logical ancestry wherever possible, so that 'svn log' on the replayed
785 repo will correctly follow file/folder renames.
786
787 == Examples ==
788 Create a copy of only /trunk from source repo, starting at r5000
789 $ svnadmin create /svn/target
790 $ svn mkdir -m 'Add trunk' file:///svn/target/trunk
791 $ svn2svn -av -r 5000 http://server/source/trunk file:///svn/target/trunk
792 1. The target_url will be checked-out to ./_wc_target
793 2. The first commit to http://server/source/trunk at/after r5000 will be
794 exported & added into _wc_target
795 3. All revisions affecting http://server/source/trunk (starting at r5000)
796 will be replayed to _wc_target. Any add/copy/move/replaces that are
797 copy-from'd some path outside of /trunk (e.g. files renamed on a /branch
798 and branch was merged into /trunk) will correctly maintain logical
799 ancestry where possible.
800
801 Use continue-mode (-c) to pick-up where the last run left-off
802 $ svn2svn -avc http://server/source/trunk file:///svn/target/trunk
803 1. The target_url will be checked-out to ./_wc_target, if not already
804 checked-out
805 2. All new revisions affecting http://server/source/trunk starting from
806 the last replayed revision to file:///svn/target/trunk (based on the
807 svn2svn:* revprops) will be replayed to _wc_target, maintaining all
808 logical ancestry where possible."""
809 parser = OptionParser(usage)
810 parser.add_option("-r", "--revision", type="int", dest="svn_rev", metavar="REV",
811 help="initial SVN revision to start source_url replay")
812 parser.add_option("-a", "--keep-author", action="store_true", dest="keep_author",
813 help="maintain original Author info from source repo")
814 parser.add_option("-c", "--continue", action="store_true", dest="cont_from_break",
815 help="continue from previous break")
816 (options, args) = run_parser(parser)
817 if len(args) != 2:
818 display_parser_error(parser, "incorrect number of arguments")
819 return real_main(options, args)
820
821
822 if __name__ == "__main__":
823 sys.exit(main() or 0)