]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn/run/svn2svn.py
No need to defer "svn remove" actions anymore, due to earlier rewrites
[svn2svn.git] / svn2svn / run / svn2svn.py
1 """
2 Replicate (replay) changesets from one SVN repository to another:
3 * Maintains full logical history (e.g. uses "svn copy" for renames).
4 * Maintains original commit messages.
5 * Optionally maintain source author info. (Only supported if accessing
6 target SVN repo via file://)
7 * Cannot maintain original commit date, but appends original commit date
8 for each commit message: "Date: %d".
9 * Optionally run an external shell script before each replayed commit
10 to give the ability to dynamically exclude or modify files as part
11 of the replay.
12
13 License: GPLv3, same as hgsvn (https://bitbucket.org/andialbrecht/hgsvn)
14 Author: Tony Duckles (https://github.com/tonyduckles/svn2svn)
15 (Inspired by http://code.google.com/p/svn2svn/, and uses code for hgsvn
16 for SVN client handling)
17 """
18
19 from .. import base_version, full_version
20 from .. import ui
21 from .. import svnclient
22 from ..shell import run_svn
23 from ..errors import (ExternalCommandFailed, UnsupportedSVNAction)
24
25 import sys
26 import os
27 import time
28 import traceback
29 import shutil
30 import operator
31 from optparse import OptionParser,OptionGroup
32 from datetime import datetime
33
34 def commit_from_svn_log_entry(log_entry, files=None, keep_author=False, target_revprops=[]):
35 """
36 Given an SVN log entry and an optional sequence of files, do an svn commit.
37 """
38 # TODO: Run optional external shell hook here, for doing pre-commit filtering
39 # This will use the local timezone for displaying commit times
40 timestamp = int(log_entry['date'])
41 svn_date = str(datetime.fromtimestamp(timestamp))
42 # Uncomment this one one if you prefer UTC commit times
43 #svn_date = "%d 0" % timestamp
44 if keep_author:
45 options = ["commit", "--force-log", "-m", log_entry['message'] + "\nDate: " + svn_date, "--username", log_entry['author']]
46 else:
47 options = ["commit", "--force-log", "-m", log_entry['message'] + "\nDate: " + svn_date + "\nAuthor: " + log_entry['author']]
48 revprops = {}
49 if log_entry['revprops']:
50 # Carry forward any revprop's from the source revision
51 for v in log_entry['revprops']:
52 revprops[v['name']] = v['value']
53 if target_revprops:
54 # Add any extra revprop's we want to set for the target repo commits
55 for v in target_revprops:
56 revprops[v['name']] = v['value']
57 if revprops:
58 for key in revprops:
59 options += ["--with-revprop", "%s=%s" % (key, str(revprops[key]))]
60 if files:
61 options += list(files)
62 output = run_svn(options)
63 rev = None
64 if output:
65 output_lines = output.strip("\n").split("\n")
66 rev = ""
67 for line in output_lines:
68 if line[0:19] == 'Committed revision ':
69 rev = line[19:].rstrip('.')
70 break
71 if rev:
72 ui.status("Committed revision %s.", rev)
73 return rev
74
75 def full_svn_revert():
76 """
77 Do an "svn revert" and proactively remove any extra files in the working copy.
78 """
79 run_svn(["revert", "--recursive", "."])
80 output = run_svn(["status"])
81 if output:
82 output_lines = output.strip("\n").split("\n")
83 for line in output_lines:
84 if line[0] == "?":
85 path = line[4:].strip(" ")
86 if os.path.isfile(path):
87 os.remove(path)
88 if os.path.isdir(path):
89 shutil.rmtree(path)
90
91 def gen_tracking_revprops(source_repos_uuid, source_url, source_rev):
92 """
93 Build an array of svn2svn-specific source-tracking revprops.
94 """
95 revprops = [{'name':'svn2svn:source_uuid', 'value':source_repos_uuid},
96 {'name':'svn2svn:source_url', 'value':source_url},
97 {'name':'svn2svn:source_rev', 'value':source_rev}]
98 return revprops
99
100 def in_svn(p, require_in_repo=False, prefix=""):
101 """
102 Check if a given file/folder is being tracked by Subversion.
103 Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
104 With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
105 Use "svn status" to check the status of the file/folder.
106 """
107 entries = svnclient.get_svn_status(p, no_recursive=True)
108 if not entries:
109 return False
110 d = entries[0]
111 if require_in_repo and (d['status'] == 'added' or d['revision'] is None):
112 # If caller requires this path to be in the SVN repo, prevent returning True
113 # for paths that are only locally-added.
114 ret = False
115 else:
116 # Don't consider files tracked as deleted in the WC as under source-control.
117 # Consider files which are locally added/copied as under source-control.
118 ret = True if not (d['status'] == 'deleted') and (d['type'] == 'normal' or d['status'] == 'added' or d['copied'] == 'true') else False
119 ui.status(prefix + ">> in_svn('%s', require_in_repo=%s) --> %s", p, str(require_in_repo), str(ret), level=ui.DEBUG, color='GREEN')
120 return ret
121
122 def find_svn_ancestors(svn_repos_url, base_path, source_path, source_rev, prefix = ""):
123 """
124 Given a source path, walk the SVN history backwards to inspect the ancestory of
125 that path, seeing if it traces back to base_path. Build an array of copyfrom_path
126 and copyfrom_revision pairs for each of the "svn copies". If we find a copyfrom_path
127 which base_path is a substring match of (e.g. we crawled back to the initial branch-
128 copy from trunk), then return the collection of ancestor paths. Otherwise,
129 copyfrom_path has no ancestory compared to base_path.
130
131 This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
132 file/folder was renamed in a branch and then that branch was merged back to trunk.
133
134 'svn_repos_url' is the full URL to the root of the SVN repository,
135 e.g. 'file:///path/to/repo'
136 'base_path' is the path in the SVN repo to the target path we're trying to
137 trace ancestry back to, e.g. 'trunk'.
138 'source_path' is the path in the SVN repo to the source path to start checking
139 ancestry at, e.g. 'branches/fix1/projectA/file1.txt'.
140 (full_path = svn_repos_url+base_path+"/"+path_offset)
141 'source_rev' is the revision to start walking the history of source_path backwards from.
142 """
143 ui.status(prefix + ">> find_svn_ancestors: Start: (%s) source_path: %s base_path: %s",
144 svn_repos_url, source_path+"@"+str(source_rev), base_path, level=ui.DEBUG, color='YELLOW')
145 done = False
146 working_path = base_path+"/"+source_path
147 working_rev = source_rev
148 first_iter_done = False
149 ancestors_temp = []
150 while not done:
151 # Get the first "svn log" entry for this path (relative to @rev)
152 ui.status(prefix + ">> find_svn_ancestors: %s", svn_repos_url + working_path+"@"+str(working_rev), level=ui.DEBUG, color='YELLOW')
153 log_entry = svnclient.get_first_svn_log_entry(svn_repos_url + working_path+"@"+str(working_rev), 1, working_rev, True)
154 if not log_entry:
155 ui.status(prefix + ">> find_svn_ancestors: Done: no log_entry", level=ui.DEBUG, color='YELLOW')
156 done = True
157 break
158 # If we found a copy-from case which matches our base_path, we're done.
159 # ...but only if we've at least tried to search for the first copy-from path.
160 if first_iter_done and working_path.startswith(base_path):
161 ui.status(prefix + ">> find_svn_ancestors: Done: Found working_path.startswith(base_path) and first_iter_done=True", level=ui.DEBUG, color='YELLOW')
162 done = True
163 break
164 first_iter_done = True
165 # Search for any actions on our target path (or parent paths).
166 changed_paths_temp = []
167 for d in log_entry['changed_paths']:
168 path = d['path']
169 if path in working_path:
170 changed_paths_temp.append({'path': path, 'data': d})
171 if not changed_paths_temp:
172 # If no matches, then we've hit the end of the chain and this path has no ancestry back to base_path.
173 ui.status(prefix + ">> find_svn_ancestors: Done: No matching changed_paths", level=ui.DEBUG, color='YELLOW')
174 done = True
175 continue
176 # Reverse-sort any matches, so that we start with the most-granular (deepest in the tree) path.
177 changed_paths = sorted(changed_paths_temp, key=operator.itemgetter('path'), reverse=True)
178 # Find the action for our working_path in this revision. Use a loop to check in reverse order,
179 # so that if the target file/folder is "M" but has a parent folder with an "A" copy-from.
180 for v in changed_paths:
181 d = v['data']
182 path = d['path']
183 # Check action-type for this file
184 action = d['action']
185 if action not in 'MARD':
186 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
187 % (log_entry['revision'], action))
188 ui.status(prefix + "> %s %s%s", action, path,
189 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
190 level=ui.DEBUG, color='YELLOW')
191 if action == 'D':
192 # If file/folder was deleted, it has no ancestor
193 ancestors_temp = []
194 ui.status(prefix + ">> find_svn_ancestors: Done: deleted", level=ui.DEBUG, color='YELLOW')
195 done = True
196 break
197 if action in 'RA':
198 # If file/folder was added/replaced but not a copy, it has no ancestor
199 if not d['copyfrom_path']:
200 ancestors_temp = []
201 ui.status(prefix + ">> find_svn_ancestors: Done: %s with no copyfrom_path",
202 "Added" if action == "A" else "Replaced",
203 level=ui.DEBUG, color='YELLOW')
204 done = True
205 break
206 # Else, file/folder was added/replaced and is a copy, so add an entry to our ancestors list
207 # and keep checking for ancestors
208 ui.status(prefix + ">> find_svn_ancestors: Found copy-from (action=%s): %s --> %s",
209 action, path, d['copyfrom_path']+"@"+str(d['copyfrom_revision']),
210 level=ui.DEBUG, color='YELLOW')
211 ancestors_temp.append({'path': path, 'revision': log_entry['revision'],
212 'copyfrom_path': d['copyfrom_path'], 'copyfrom_rev': d['copyfrom_revision']})
213 working_path = working_path.replace(d['path'], d['copyfrom_path'])
214 working_rev = d['copyfrom_revision']
215 # Follow the copy and keep on searching
216 break
217 ancestors = []
218 if ancestors_temp:
219 ancestors.append({'path': base_path+"/"+source_path, 'revision': source_rev})
220 working_path = base_path+"/"+source_path
221 for idx in range(len(ancestors_temp)):
222 d = ancestors_temp[idx]
223 working_path = working_path.replace(d['path'], d['copyfrom_path'])
224 working_rev = d['copyfrom_rev']
225 ancestors.append({'path': working_path, 'revision': working_rev})
226 max_len = 0
227 for idx in range(len(ancestors)):
228 d = ancestors[idx]
229 max_len = max(max_len, len(d['path']+"@"+str(d['revision'])))
230 ui.status(prefix + ">> find_svn_ancestors: Found parent ancestors:", level=ui.DEBUG, color='YELLOW_B')
231 for idx in range(len(ancestors)-1):
232 d = ancestors[idx]
233 d_next = ancestors[idx+1]
234 ui.status(prefix + " [%s] %s <-- %s", idx,
235 str(d['path']+"@"+str(d['revision'])).ljust(max_len),
236 str(d_next['path']+"@"+str(d_next['revision'])).ljust(max_len),
237 level=ui.DEBUG, color='YELLOW')
238 else:
239 ui.status(prefix + ">> find_svn_ancestors: No ancestor-chain found: %s",
240 svn_repos_url+base_path+"/"+source_path+"@"+str(source_rev), level=ui.DEBUG, color='YELLOW')
241 return ancestors
242
243 def get_rev_map(rev_map, source_rev, prefix):
244 """
245 Find the equivalent rev # in the target repo for the given rev # from the source repo.
246 """
247 ui.status(prefix + ">> get_rev_map(%s)", source_rev, level=ui.DEBUG, color='GREEN')
248 # Find the highest entry less-than-or-equal-to source_rev
249 for rev in range(int(source_rev), 0, -1):
250 ui.status(prefix + ">> get_rev_map: rev=%s in_rev_map=%s", rev, str(rev in rev_map), level=ui.DEBUG, color='BLACK_B')
251 if rev in rev_map:
252 return int(rev_map[rev])
253 # Else, we fell off the bottom of the rev_map. Ruh-roh...
254 return None
255
256 def set_rev_map(rev_map, source_rev, target_rev):
257 ui.status(">> set_rev_map: source_rev=%s target_rev=%s", source_rev, target_rev, level=ui.DEBUG, color='GREEN')
258 rev_map[int(source_rev)]=int(target_rev)
259
260 def build_rev_map(target_url, source_info):
261 """
262 Check for any already-replayed history from source_url (source_info) and
263 build the mapping-table of source_rev -> target_rev.
264 """
265 rev_map = {}
266 ui.status("Rebuilding rev_map...", level=ui.VERBOSE)
267 proc_count = 0
268 it_log_entries = svnclient.iter_svn_log_entries(target_url, 1, 'HEAD', get_changed_paths=False, get_revprops=True)
269 for log_entry in it_log_entries:
270 if log_entry['revprops']:
271 revprops = {}
272 for v in log_entry['revprops']:
273 if v['name'].startswith('svn2svn:'):
274 revprops[v['name']] = v['value']
275 if revprops['svn2svn:source_uuid'] == source_info['repos_uuid'] and \
276 revprops['svn2svn:source_url'] == source_info['url']:
277 source_rev = revprops['svn2svn:source_rev']
278 target_rev = log_entry['revision']
279 set_rev_map(rev_map, source_rev, target_rev)
280 return rev_map
281
282 def get_svn_dirlist(svn_path, svn_rev = ""):
283 """
284 Get a list of all the child contents (recusive) of the given folder path.
285 """
286 args = ["list"]
287 path = svn_path
288 if svn_rev:
289 args += ["-r", svn_rev]
290 path += "@"+str(svn_rev)
291 args += [path]
292 paths = run_svn(args, no_fail=True)
293 paths = paths.strip("\n").split("\n") if len(paths)>1 else []
294 return paths
295
296 def add_path(paths, path_offset):
297 """
298 Helper function to add a path to a list but only if a parent path isn't
299 already in the list. Assumes that paths are added roughly in breath-first
300 order.
301 """
302 found = False
303 for p in paths:
304 if path_offset.startswith(p):
305 found = True
306 break
307 if not found:
308 paths.append(path_offset)
309 return paths
310
311 def do_svn_add(source_repos_url, source_url, path_offset, target_url, source_rev, \
312 parent_copyfrom_path="", parent_copyfrom_rev="", export_paths={}, \
313 rev_map={}, is_dir = False, prefix = ""):
314 """
315 Given the add'd source path, replay the "svn add/copy" commands to correctly
316 track renames across copy-from's.
317
318 For example, consider a sequence of events like this:
319 1. svn copy /trunk /branches/fix1
320 2. (Make some changes on /branches/fix1)
321 3. svn mv /branches/fix1/Proj1 /branches/fix1/Proj2 " Rename folder
322 4. svn mv /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt " Rename file inside renamed folder
323 5. svn co /trunk && svn merge /branches/fix1
324 After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
325 and and add of /trunk/Proj2 copy-from /branches/fix1/Proj2. If we were just
326 to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
327 we'd lose the logical history that Proj2/file2.txt is really a descendant
328 of Proj1/file1.txt.
329
330 'source_repos_url' is the full URL to the root of the source repository.
331 'source_url' is the full URL to the source path in the source repository.
332 'path_offset' is the offset from source_base to the file to check ancestry for,
333 e.g. 'projectA/file1.txt'. path = source_repos_url + source_base + path_offset.
334 'target_url' is the full URL to the target path in the target repository.
335 'source_rev' is the revision ("svn log") that we're processing from the source repo.
336 'parent_copyfrom_path' and 'parent_copyfrom_rev' is the copy-from path of the parent
337 directory, when being called recursively by do_svn_add_dir().
338 'export_paths' is the list of path_offset's that we've deferred running "svn export" on.
339 'rev_map' is the running mapping-table dictionary for source-repo rev #'s
340 to the equivalent target-repo rev #'s.
341 'is_dir' is whether path_offset is a directory (rather than a file).
342 """
343 source_base = source_url[len(source_repos_url):]
344 ui.status(prefix + ">> do_svn_add: %s %s", source_base+"/"+path_offset+"@"+str(source_rev),
345 " (parent-copyfrom: "+parent_copyfrom_path+"@"+str(parent_copyfrom_rev)+")" if parent_copyfrom_path else "",
346 level=ui.DEBUG, color='GREEN')
347 # Check if the given path has ancestors which chain back to the current source_base
348 found_ancestor = False
349 ancestors = find_svn_ancestors(source_repos_url, source_base, path_offset, source_rev, prefix+" ")
350 # ancestors[n] is the original (pre-branch-copy) trunk path.
351 # ancestors[n-1] is the first commit on the new branch.
352 copyfrom_path = ancestors[len(ancestors)-1]['path'] if ancestors else ""
353 copyfrom_rev = ancestors[len(ancestors)-1]['revision'] if ancestors else ""
354 if ancestors:
355 # The copy-from path has ancestory back to source_url.
356 ui.status(prefix + ">> do_svn_add: Check copy-from: Found parent: %s", copyfrom_path+"@"+str(copyfrom_rev),
357 level=ui.DEBUG, color='GREEN', bold=True)
358 found_ancestor = True
359 # Map the copyfrom_rev (source repo) to the equivalent target repo rev #. This can
360 # return None in the case where copyfrom_rev is *before* our source_start_rev.
361 tgt_rev = get_rev_map(rev_map, copyfrom_rev, prefix+" ")
362 ui.status(prefix + ">> do_svn_add: get_rev_map: %s (source) -> %s (target)", copyfrom_rev, tgt_rev, level=ui.DEBUG, color='GREEN')
363 else:
364 ui.status(prefix + ">> do_svn_add: Check copy-from: No ancestor chain found.", level=ui.DEBUG, color='GREEN')
365 found_ancestor = False
366 if found_ancestor and tgt_rev:
367 # Check if this path_offset in the target WC already has this ancestry, in which
368 # case there's no need to run the "svn copy" (again).
369 path_in_svn = in_svn(path_offset, prefix=prefix+" ")
370 log_entry = svnclient.get_last_svn_log_entry(path_offset, 1, 'HEAD', get_changed_paths=False) if in_svn(path_offset, require_in_repo=True, prefix=prefix+" ") else []
371 if (not log_entry or (log_entry['revision'] != tgt_rev)):
372 copyfrom_offset = copyfrom_path[len(source_base):].strip('/')
373 ui.status(prefix + ">> do_svn_add: svn_copy: Copy-from: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
374 ui.status(prefix + " copyfrom: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
375 ui.status(prefix + " p_copyfrom: %s", parent_copyfrom_path+"@"+str(parent_copyfrom_rev) if parent_copyfrom_path else "", level=ui.DEBUG, color='GREEN')
376 if path_in_svn and \
377 ((parent_copyfrom_path and copyfrom_path.startswith(parent_copyfrom_path)) and \
378 (parent_copyfrom_rev and copyfrom_rev == parent_copyfrom_rev)):
379 # When being called recursively, if this child entry has the same ancestor as the
380 # the parent, then no need to try to run another "svn copy".
381 ui.status(prefix + ">> do_svn_add: svn_copy: Same ancestry as parent: %s",
382 parent_copyfrom_path+"@"+str(parent_copyfrom_rev),level=ui.DEBUG, color='GREEN')
383 pass
384 else:
385 # Copy this path from the equivalent path+rev in the target repo, to create the
386 # equivalent history.
387 if parent_copyfrom_path:
388 # If we have a parent copy-from path, we mis-match that so display a status
389 # message describing the action we're mimic'ing. If path_in_svn, then this
390 # is logically a "replace" rather than an "add".
391 ui.status(" %s %s (from %s)", ('R' if path_in_svn else 'A'), source_base+"/"+path_offset, ancestors[1]['path']+"@"+str(copyfrom_rev), level=ui.VERBOSE)
392 if path_in_svn:
393 # If local file is already under version-control, then this is a replace.
394 ui.status(prefix + ">> do_svn_add: pre-copy: local path already exists: %s", path_offset, level=ui.DEBUG, color='GREEN')
395 run_svn(["remove", "--force", path_offset])
396 run_svn(["copy", "-r", tgt_rev, target_url+"/"+copyfrom_offset+"@"+str(tgt_rev), path_offset])
397 # Export the final version of this file/folder from the source repo, to make
398 # sure we're up-to-date.
399 export_paths = add_path(export_paths, path_offset)
400 else:
401 ui.status(prefix + ">> do_svn_add: Skipped 'svn copy': %s", path_offset, level=ui.DEBUG, color='GREEN')
402 else:
403 # Else, either this copy-from path has no ancestry back to source_url OR copyfrom_rev comes
404 # before our initial source_start_rev (i.e. tgt_rev == None), so can't do a "svn copy".
405 # Create (parent) directory if needed.
406 # TODO: This is (nearly) a duplicate of code in process_svn_log_entry(). Should this be
407 # split-out to a shared tag?
408 p_path = path_offset if is_dir else os.path.dirname(path_offset).strip() or '.'
409 if not os.path.exists(p_path):
410 run_svn(["mkdir", p_path])
411 if not in_svn(path_offset, prefix=prefix+" "):
412 if is_dir:
413 # Export the final verison of all files in this folder.
414 export_paths = add_path(export_paths, path_offset)
415 else:
416 # Export the final verison of this file. We *need* to do this before running
417 # the "svn add", even if we end-up re-exporting this file again via export_paths.
418 run_svn(["export", "--force", "-r", source_rev,
419 source_repos_url+source_base+"/"+path_offset+"@"+str(source_rev), path_offset])
420 # If not already under version-control, then "svn add" this file/folder.
421 run_svn(["add", "--parents", path_offset])
422 # TODO: Need to copy SVN properties from source repos
423 if is_dir:
424 # For any folders that we process, process any child contents, so that we correctly
425 # replay copies/replaces/etc.
426 do_svn_add_dir(source_repos_url, source_url, path_offset, source_rev, target_url,
427 copyfrom_path, copyfrom_rev, export_paths, rev_map, prefix+" ")
428
429 def do_svn_add_dir(source_repos_url, source_url, path_offset, source_rev, target_url, \
430 parent_copyfrom_path, parent_copyfrom_rev, export_paths, rev_map, prefix=""):
431 source_base = source_url[len(source_repos_url):]
432 # Get the directory contents, to compare between the local WC (target_url) vs. the remote repo (source_url)
433 # TODO: paths_local won't include add'd paths because "svn ls" lists the contents of the
434 # associated remote repo folder. (Is this a problem?)
435 paths_local = get_svn_dirlist(path_offset)
436 paths_remote = get_svn_dirlist(source_url+"/"+path_offset, source_rev)
437 ui.status(prefix + ">> do_svn_add_dir: paths_local: %s", str(paths_local), level=ui.DEBUG, color='GREEN')
438 ui.status(prefix + ">> do_svn_add_dir: paths_remote: %s", str(paths_remote), level=ui.DEBUG, color='GREEN')
439 # Update files/folders which exist in remote but not local
440 for path in paths_remote:
441 path_is_dir = True if path[-1] == "/" else False
442 working_path = path_offset+"/"+(path.rstrip('/') if path_is_dir else path)
443 do_svn_add(source_repos_url, source_url, working_path, target_url, source_rev,
444 parent_copyfrom_path, parent_copyfrom_rev, export_paths,
445 rev_map, path_is_dir, prefix+" ")
446 # Remove files/folders which exist in local but not remote
447 for path in paths_local:
448 if not path in paths_remote:
449 ui.status(" %s %s", 'D', source_base+"/"+path_offset+"/"+path, level=ui.VERBOSE)
450 run_svn(["remove", "--force", path_offset+"/"+path])
451 # TODO: Does this handle deleted folders too? Wouldn't want to have a case
452 # where we only delete all files from folder but leave orphaned folder around.
453
454 def process_svn_log_entry(log_entry, source_repos_url, source_url, target_url, \
455 rev_map, commit_paths = [], prefix = ""):
456 """
457 Process SVN changes from the given log entry.
458 Returns array of all the paths in the working-copy that were changed,
459 i.e. the paths which need to be "svn commit".
460
461 'log_entry' is the array structure built by parse_svn_log_xml().
462 'source_repos_url' is the full URL to the root of the source repository.
463 'source_url' is the full URL to the source path in the source repository.
464 'target_url' is the full URL to the target path in the target repository.
465 'rev_map' is the running mapping-table dictionary for source-repo rev #'s
466 to the equivalent target-repo rev #'s.
467 'commit_paths' is the working list of specific paths which changes to pass
468 to the final "svn commit".
469 """
470 export_paths = []
471 # Get the relative offset of source_url based on source_repos_url
472 # e.g. '/branches/bug123'
473 source_base = source_url[len(source_repos_url):]
474 source_rev = log_entry['revision']
475 ui.status(prefix + ">> process_svn_log_entry: %s", source_url+"@"+str(source_rev), level=ui.DEBUG, color='GREEN')
476 for d in log_entry['changed_paths']:
477 # Get the full path for this changed_path
478 # e.g. '/branches/bug123/projectA/file1.txt'
479 path = d['path']
480 if not path.startswith(source_base + "/"):
481 # Ignore changed files that are not part of this subdir
482 if path != source_base:
483 ui.status(prefix + ">> process_svn_log_entry: Unrelated path: %s (base: %s)", path, source_base, level=ui.DEBUG, color='GREEN')
484 continue
485 # Calculate the offset (based on source_base) for this changed_path
486 # e.g. 'projectA/file1.txt'
487 # (path = source_base + "/" + path_offset)
488 path_offset = path[len(source_base):].strip("/")
489 # Get the action for this path
490 action = d['action']
491 if action not in 'MARD':
492 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
493 % (source_rev, action))
494 ui.status(" %s %s%s", action, d['path'],
495 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
496 level=ui.VERBOSE)
497
498 # Try to be efficient and keep track of an explicit list of paths in the
499 # working copy that changed. If we commit from the root of the working copy,
500 # then SVN needs to crawl the entire working copy looking for pending changes.
501 # But, if we gather too many paths to commit, then we wipe commit_paths below
502 # and end-up doing a commit at the root of the working-copy.
503 if len (commit_paths) < 100:
504 commit_paths.append(path_offset)
505
506 # Special-handling for replace's
507 if action == 'R':
508 # If file was "replaced" (deleted then re-added, all in same revision),
509 # then we need to run the "svn rm" first, then change action='A'. This
510 # lets the normal code below handle re-"svn add"'ing the files. This
511 # should replicate the "replace".
512 run_svn(["remove", "--force", path_offset])
513 action = 'A'
514
515 # Handle all the various action-types
516 # (Handle "add" first, for "svn copy/move" support)
517 if action == 'A':
518 # Determine where to export from.
519 svn_copy = False
520 path_is_dir = True if d['kind'] == 'dir' else False
521 # Handle cases where this "add" was a copy from another URL in the source repos
522 if d['copyfrom_revision']:
523 copyfrom_path = d['copyfrom_path']
524 copyfrom_rev = d['copyfrom_revision']
525 do_svn_add(source_repos_url, source_url, path_offset, target_url, source_rev,
526 "", "", export_paths, rev_map, path_is_dir, prefix+" ")
527 # Else just "svn export" the files from the source repo and "svn add" them.
528 else:
529 # Create (parent) directory if needed
530 p_path = path_offset if path_is_dir else os.path.dirname(path_offset).strip() or '.'
531 if not os.path.exists(p_path):
532 run_svn(["mkdir", p_path])
533 # Export the entire added tree.
534 if path_is_dir:
535 # For directories, defer the (recurisve) "svn export". Might have a
536 # situation in a branch merge where the entry in the svn-log is a
537 # non-copy-from'd "add" but there are child contents (that we haven't
538 # gotten to yet in log_entry) that are copy-from's. When we try do
539 # the "svn copy" later on in do_svn_add() for those copy-from'd paths,
540 # having pre-existing (svn-add'd) contents creates some trouble.
541 # Instead, just create the stub folders ("svn mkdir" above) and defer
542 # exporting the final file-state until the end.
543 export_paths = add_path(export_paths, path_offset)
544 else:
545 # Export the final verison of this file. We *need* to do this before running
546 # the "svn add", even if we end-up re-exporting this file again via export_paths.
547 run_svn(["export", "--force", "-r", source_rev,
548 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
549 if not in_svn(path_offset, prefix=prefix+" "):
550 # Need to use in_svn here to handle cases where client committed the parent
551 # folder and each indiv sub-folder.
552 run_svn(["add", "--parents", path_offset])
553 # TODO: Need to copy SVN properties from source repos
554
555 elif action == 'D':
556 run_svn(["remove", "--force", path_offset])
557
558 elif action == 'M':
559 # TODO: Is "svn merge -c" correct here? Should this just be an "svn export" plus
560 # proplist updating?
561 out = run_svn(["merge", "-c", source_rev, "--non-recursive",
562 "--non-interactive", "--accept=theirs-full",
563 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
564
565 else:
566 raise SVNError("Internal Error: process_svn_log_entry: Unhandled 'action' value: '%s'"
567 % action)
568
569 # Export the final version of all add'd paths from source_url
570 if export_paths:
571 for path_offset in export_paths:
572 run_svn(["export", "--force", "-r", source_rev,
573 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
574
575 return commit_paths
576
577 def disp_svn_log_summary(log_entry):
578 ui.status("")
579 ui.status("r%s | %s | %s",
580 log_entry['revision'],
581 log_entry['author'],
582 str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' ')))
583 ui.status(log_entry['message'])
584 ui.status("------------------------------------------------------------------------")
585
586 def pull_svn_rev(log_entry, source_repos_url, source_repos_uuid, source_url, target_url, rev_map, keep_author=False):
587 """
588 Pull SVN changes from the given log entry.
589 Returns the new SVN revision.
590 If an exception occurs, it will rollback to revision 'source_rev - 1'.
591 """
592 disp_svn_log_summary(log_entry)
593 source_rev = log_entry['revision']
594
595 # Process all the paths in this log entry
596 commit_paths = []
597 process_svn_log_entry(log_entry, source_repos_url, source_url, target_url,
598 rev_map, commit_paths)
599 # If we had too many individual paths to commit, wipe the list and just commit at
600 # the root of the working copy.
601 if len (commit_paths) > 99:
602 commit_paths = []
603
604 target_revprops = gen_tracking_revprops(source_repos_uuid, source_url, source_rev) # Build source-tracking revprop's
605 return commit_from_svn_log_entry(log_entry, commit_paths, \
606 keep_author=keep_author, target_revprops=target_revprops)
607
608 def run_parser(parser):
609 """
610 Add common options to an OptionParser instance, and run parsing.
611 """
612 parser.add_option("", "--version", dest="show_version", action="store_true",
613 help="show version and exit")
614 parser.remove_option("--help")
615 parser.add_option("-h", "--help", dest="show_help", action="store_true",
616 help="show this help message and exit")
617 parser.add_option("-v", "--verbose", dest="verbosity", const=ui.VERBOSE,
618 default=10, action="store_const",
619 help="enable additional output")
620 parser.add_option("--debug", dest="verbosity", const=ui.DEBUG,
621 action="store_const",
622 help="enable debugging output")
623 options, args = parser.parse_args()
624 if options.show_help:
625 parser.print_help()
626 sys.exit(0)
627 if options.show_version:
628 prog_name = os.path.basename(sys.argv[0])
629 print prog_name, full_version
630 sys.exit(0)
631 ui.update_config(options)
632 return options, args
633
634 def display_parser_error(parser, message):
635 """
636 Display an options error, and terminate.
637 """
638 print "error:", message
639 print
640 parser.print_help()
641 sys.exit(1)
642
643 def real_main(options, args):
644 source_url = args.pop(0).rstrip("/")
645 target_url = args.pop(0).rstrip("/")
646 if options.keep_author:
647 keep_author = True
648 else:
649 keep_author = False
650
651 # Make sure that both the source and target URL's are valid
652 source_info = svnclient.get_svn_info(source_url)
653 assert source_url.startswith(source_info['repos_url'])
654 target_info = svnclient.get_svn_info(target_url)
655 assert target_url.startswith(target_info['repos_url'])
656
657 source_end_rev = source_info['revision'] # Get the last revision # for the source repo
658 source_repos_url = source_info['repos_url'] # Get the base URL for the source repo, e.g. 'svn://svn.example.com/svn/repo'
659 source_repos_uuid = source_info['repos_uuid'] # Get the UUID for the source repo
660
661 wc_target = os.path.abspath('_wc_target')
662 rev_map = {}
663
664 # Check out a working copy of target_url if needed
665 wc_exists = os.path.exists(wc_target)
666 if wc_exists and not options.cont_from_break:
667 shutil.rmtree(wc_target)
668 wc_exists = False
669 if not wc_exists:
670 svnclient.svn_checkout(target_url, wc_target)
671 os.chdir(wc_target)
672
673 if not options.cont_from_break:
674 # Get log entry for the SVN revision we will check out
675 if options.svn_rev:
676 # If specify a rev, get log entry just before or at rev
677 source_start_log = svnclient.get_last_svn_log_entry(source_url, 1, options.svn_rev, False)
678 else:
679 # Otherwise, get log entry of branch creation
680 # Note: Trying to use svnclient.get_first_svn_log_entry(source_url, 1, source_end_rev, False)
681 # ends-up being *VERY* time-consuming on a repo with lots of revisions. Even though
682 # the "svn log" call is passing --limit 1, it seems like that limit-filter is happening
683 # _after_ svn has fetched the full log history. Instead, search the history in chunks
684 # and write some progress to the screen.
685 ui.status("Searching for start source revision (%s)...", source_url, level=ui.VERBOSE)
686 rev = 1
687 chunk_size = 1000
688 done = False
689 while not done:
690 entries = svnclient.run_svn_log(source_url, rev, min(rev+chunk_size-1, target_info['revision']), 1, get_changed_paths=False)
691 if entries:
692 source_start_log = entries[0]
693 done = True
694 break
695 ui.status("...%s...", rev)
696 rev = rev+chunk_size
697 if rev > target_info['revision']:
698 done = True
699 if not source_start_log:
700 raise RuntimeError("Unable to find first revision for source_url: %s" % source_url)
701
702 # This is the revision we will start from for source_url
703 source_start_rev = source_rev = int(source_start_log['revision'])
704 ui.status("Starting at source revision %s.", source_start_rev, level=ui.VERBOSE)
705
706 # For the initial commit to the target URL, export all the contents from
707 # the source URL at the start-revision.
708 paths = run_svn(["list", "-r", source_rev, source_url+"@"+str(source_rev)])
709 if len(paths)>1:
710 disp_svn_log_summary(svnclient.get_one_svn_log_entry(source_url, source_rev, source_rev))
711 ui.status("(Initial import)", level=ui.VERBOSE)
712 paths = paths.strip("\n").split("\n")
713 for path_raw in paths:
714 # For each top-level file/folder...
715 if not path_raw:
716 continue
717 # Directories have a trailing slash in the "svn list" output
718 path_is_dir = True if path_raw[-1] == "/" else False
719 path = path_raw.rstrip('/') if path_is_dir else path_raw
720 if path_is_dir and not os.path.exists(path):
721 os.makedirs(path)
722 ui.status(" A %s", source_url[len(source_repos_url):]+"/"+path, level=ui.VERBOSE)
723 run_svn(["export", "--force", "-r" , source_rev, source_url+"/"+path+"@"+str(source_rev), path])
724 run_svn(["add", path])
725 target_revprops = gen_tracking_revprops(source_repos_uuid, source_url, source_rev) # Build source-tracking revprop's
726 target_rev = commit_from_svn_log_entry(source_start_log, \
727 keep_author=keep_author, target_revprops=target_revprops)
728 if target_rev:
729 set_rev_map(rev_map, source_rev, target_rev)
730 else:
731 # Re-build the rev_map based on any already-replayed history in target_url
732 rev_map = build_rev_map(target_url, source_info)
733 if not rev_map:
734 raise RuntimeError("Called with continue-mode, but no already-replayed history found in target repo: %s" % target_url)
735 source_start_rev = int(max(rev_map, key=rev_map.get))
736 assert source_start_rev
737 ui.status("Continuing from source revision %s.", source_start_rev, level=ui.VERBOSE)
738
739 commit_count = 0
740 svn_vers_t = svnclient.get_svn_client_version()
741 svn_vers = float(".".join(map(str, svn_vers_t[0:2])))
742
743 # Load SVN log starting from source_start_rev + 1
744 it_log_entries = svnclient.iter_svn_log_entries(source_url, source_start_rev+1, source_end_rev, get_revprops=True)
745
746 try:
747 for log_entry in it_log_entries:
748 # Replay this revision from source_url into target_url
749 target_rev = pull_svn_rev(log_entry, source_repos_url, source_repos_uuid, source_url,
750 target_url, rev_map, keep_author)
751 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
752 run_svn(["update"])
753 commit_count += 1
754 # Run "svn cleanup" every 100 commits if SVN 1.7+, to clean-up orphaned ".svn/pristines/*"
755 if svn_vers >= 1.7 and (commit_count % 100 == 0):
756 run_svn(["cleanup"])
757 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
758 if target_rev:
759 source_rev = log_entry['revision']
760 set_rev_map(rev_map, source_rev, target_rev)
761
762 except KeyboardInterrupt:
763 print "\nStopped by user."
764 run_svn(["cleanup"])
765 full_svn_revert()
766 except:
767 print "\nCommand failed with following error:\n"
768 traceback.print_exc()
769 run_svn(["cleanup"])
770 print run_svn(["status"])
771 full_svn_revert()
772 finally:
773 run_svn(["update"])
774 print "\nFinished!"
775
776 def main():
777 # Defined as entry point. Must be callable without arguments.
778 usage = """Usage: %prog [OPTIONS] source_url target_url
779
780 Replicate (replay) history from one SVN repository to another. Maintain
781 logical ancestry wherever possible, so that 'svn log' on the replayed
782 repo will correctly follow file/folder renames.
783
784 == Examples ==
785 Create a copy of only /trunk from source repo, starting at r5000
786 $ svnadmin create /svn/target
787 $ svn mkdir -m 'Add trunk' file:///svn/target/trunk
788 $ svn2svn -av -r 5000 http://server/source/trunk file:///svn/target/trunk
789 1. The target_url will be checked-out to ./_wc_target
790 2. The first commit to http://server/source/trunk at/after r5000 will be
791 exported & added into _wc_target
792 3. All revisions affecting http://server/source/trunk (starting at r5000)
793 will be replayed to _wc_target. Any add/copy/move/replaces that are
794 copy-from'd some path outside of /trunk (e.g. files renamed on a /branch
795 and branch was merged into /trunk) will correctly maintain logical
796 ancestry where possible.
797
798 Use continue-mode (-c) to pick-up where the last run left-off
799 $ svn2svn -avc http://server/source/trunk file:///svn/target/trunk
800 1. The target_url will be checked-out to ./_wc_target, if not already
801 checked-out
802 2. All new revisions affecting http://server/source/trunk starting from
803 the last replayed revision to file:///svn/target/trunk (based on the
804 svn2svn:* revprops) will be replayed to _wc_target, maintaining all
805 logical ancestry where possible."""
806 parser = OptionParser(usage)
807 parser.add_option("-r", "--revision", type="int", dest="svn_rev", metavar="REV",
808 help="initial SVN revision to start source_url replay")
809 parser.add_option("-a", "--keep-author", action="store_true", dest="keep_author",
810 help="maintain original Author info from source repo")
811 parser.add_option("-c", "--continue", action="store_true", dest="cont_from_break",
812 help="continue from previous break")
813 (options, args) = run_parser(parser)
814 if len(args) != 2:
815 display_parser_error(parser, "incorrect number of arguments")
816 return real_main(options, args)
817
818
819 if __name__ == "__main__":
820 sys.exit(main() or 0)