]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn/run/svn2svn.py
WIP on verify
[svn2svn.git] / svn2svn / run / svn2svn.py
1 """
2 Replicate (replay) changesets from one SVN repository to another.
3 """
4
5 from .. import base_version, full_version
6 from .. import ui
7 from .. import svnclient
8 from ..shell import run_svn
9 from ..errors import (ExternalCommandFailed, UnsupportedSVNAction, InternalError, VerificationError)
10 from parse import HelpFormatter
11
12 import sys
13 import os
14 import time
15 import traceback
16 import shutil
17 import operator
18 import optparse
19 from datetime import datetime
20
21 _valid_svn_actions = "MARD" # The list of known SVN action abbr's, from "svn log"
22
23 # Module-level variables/parameters
24 source_url = "" # URL to source path in source SVN repo, e.g. 'http://server/svn/source/trunk'
25 source_repos_url = "" # URL to root of source SVN repo, e.g. 'http://server/svn/source'
26 source_base = "" # Relative path of source_url in source SVN repo, e.g. '/trunk'
27 source_repos_uuid = "" # UUID of source SVN repo
28 target_url ="" # URL to target path in target SVN repo, e.g. 'file:///svn/repo_target/trunk'
29 rev_map = {} # The running mapping-table dictionary for source_url rev #'s -> target_url rev #'s
30
31 def commit_from_svn_log_entry(log_entry, options, commit_paths=None, target_revprops=None):
32 """
33 Given an SVN log entry and an optional list of changed paths, do an svn commit.
34 """
35 # TODO: Run optional external shell hook here, for doing pre-commit filtering
36 # Display the _wc_target "svn status" info if running in -vv (or higher) mode
37 if ui.get_level() >= ui.EXTRA:
38 ui.status(">> commit_from_svn_log_entry: Pre-commit _wc_target status:", level=ui.EXTRA, color='CYAN')
39 ui.status(run_svn(["status"]), level=ui.EXTRA, color='CYAN')
40 # This will use the local timezone for displaying commit times
41 timestamp = int(log_entry['date'])
42 svn_date = str(datetime.fromtimestamp(timestamp))
43 # Uncomment this one one if you prefer UTC commit times
44 #svn_date = "%d 0" % timestamp
45 args = ["commit", "--force-log"]
46 if options.keep_author:
47 args += ["-m", log_entry['message'] + "\nDate: " + svn_date, "--username", log_entry['author']]
48 else:
49 args += ["-m", log_entry['message'] + "\nDate: " + svn_date + "\nAuthor: " + log_entry['author']]
50 revprops = {}
51 if log_entry['revprops']:
52 # Carry forward any revprop's from the source revision
53 for v in log_entry['revprops']:
54 revprops[v['name']] = v['value']
55 if target_revprops:
56 # Add any extra revprop's we want to set for the target repo commits
57 for v in target_revprops:
58 revprops[v['name']] = v['value']
59 if revprops:
60 for key in revprops:
61 args += ["--with-revprop", "%s=%s" % (key, str(revprops[key]))]
62 if commit_paths:
63 if len(commit_paths)<100:
64 # If we don't have an excessive amount of individual changed paths, pass
65 # those to the "svn commit" command. Else, pass nothing so we commit at
66 # the root of the working-copy.
67 args += list(commit_paths)
68 rev = None
69 if not options.dry_run:
70 # Run the "svn commit" command, and screen-scrape the target_rev value (if any)
71 output = run_svn(args)
72 if output:
73 output_lines = output.strip("\n").split("\n")
74 rev = ""
75 for line in output_lines:
76 if line[0:19] == 'Committed revision ':
77 rev = line[19:].rstrip('.')
78 break
79 if rev:
80 ui.status("Committed revision %s.", rev)
81 return rev
82
83 def full_svn_revert():
84 """
85 Do an "svn revert" and proactively remove any extra files in the working copy.
86 """
87 run_svn(["revert", "--recursive", "."])
88 output = run_svn(["status"])
89 if output:
90 output_lines = output.strip("\n").split("\n")
91 for line in output_lines:
92 if line[0] == "?":
93 path = line[4:].strip(" ")
94 if os.path.isfile(path):
95 os.remove(path)
96 if os.path.isdir(path):
97 shutil.rmtree(path)
98
99 def gen_tracking_revprops(source_rev):
100 """
101 Build an array of svn2svn-specific source-tracking revprops.
102 """
103 revprops = [{'name':'svn2svn:source_uuid', 'value':source_repos_uuid},
104 {'name':'svn2svn:source_url', 'value':source_url},
105 {'name':'svn2svn:source_rev', 'value':source_rev}]
106 return revprops
107
108 def in_svn(p, require_in_repo=False, prefix=""):
109 """
110 Check if a given file/folder is being tracked by Subversion.
111 Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
112 With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
113 Use "svn status" to check the status of the file/folder.
114 """
115 entries = svnclient.get_svn_status(p, no_recursive=True)
116 if not entries:
117 return False
118 d = entries[0]
119 if require_in_repo and (d['status'] == 'added' or d['revision'] is None):
120 # If caller requires this path to be in the SVN repo, prevent returning True
121 # for paths that are only locally-added.
122 ret = False
123 else:
124 # Don't consider files tracked as deleted in the WC as under source-control.
125 # Consider files which are locally added/copied as under source-control.
126 ret = True if not (d['status'] == 'deleted') and (d['type'] == 'normal' or d['status'] == 'added' or d['copied'] == 'true') else False
127 ui.status(prefix + ">> in_svn('%s', require_in_repo=%s) --> %s", p, str(require_in_repo), str(ret), level=ui.DEBUG, color='GREEN')
128 return ret
129
130 def find_svn_ancestors(svn_repos_url, base_path, source_path, source_rev, prefix = ""):
131 """
132 Given a source path, walk the SVN history backwards to inspect the ancestory of
133 that path, seeing if it traces back to base_path. Build an array of copyfrom_path
134 and copyfrom_revision pairs for each of the "svn copies". If we find a copyfrom_path
135 which base_path is a substring match of (e.g. we crawled back to the initial branch-
136 copy from trunk), then return the collection of ancestor paths. Otherwise,
137 copyfrom_path has no ancestory compared to base_path.
138
139 This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
140 file/folder was renamed in a branch and then that branch was merged back to trunk.
141
142 'svn_repos_url' is the full URL to the root of the SVN repository,
143 e.g. 'file:///path/to/repo'
144 'base_path' is the path in the SVN repo to the target path we're trying to
145 trace ancestry back to, e.g. 'trunk'.
146 'source_path' is the path in the SVN repo to the source path to start checking
147 ancestry at, e.g. 'branches/fix1/projectA/file1.txt'.
148 (full_path = svn_repos_url+base_path+"/"+path_offset)
149 'source_rev' is the revision to start walking the history of source_path backwards from.
150 """
151 ui.status(prefix + ">> find_svn_ancestors: Start: (%s) source_path: %s base_path: %s",
152 svn_repos_url, source_path+"@"+str(source_rev), base_path, level=ui.DEBUG, color='YELLOW')
153 done = False
154 working_path = base_path+"/"+source_path
155 working_rev = source_rev
156 first_iter_done = False
157 ancestors_temp = []
158 while not done:
159 # Get the first "svn log" entry for this path (relative to @rev)
160 ui.status(prefix + ">> find_svn_ancestors: %s", svn_repos_url + working_path+"@"+str(working_rev), level=ui.DEBUG, color='YELLOW')
161 log_entry = svnclient.get_first_svn_log_entry(svn_repos_url + working_path+"@"+str(working_rev), 1, working_rev, True)
162 if not log_entry:
163 ui.status(prefix + ">> find_svn_ancestors: Done: no log_entry", level=ui.DEBUG, color='YELLOW')
164 done = True
165 break
166 # If we found a copy-from case which matches our base_path, we're done.
167 # ...but only if we've at least tried to search for the first copy-from path.
168 if first_iter_done and working_path.startswith(base_path):
169 ui.status(prefix + ">> find_svn_ancestors: Done: Found working_path.startswith(base_path) and first_iter_done=True", level=ui.DEBUG, color='YELLOW')
170 done = True
171 break
172 first_iter_done = True
173 # Search for any actions on our target path (or parent paths).
174 changed_paths_temp = []
175 for d in log_entry['changed_paths']:
176 path = d['path']
177 if path in working_path:
178 changed_paths_temp.append({'path': path, 'data': d})
179 if not changed_paths_temp:
180 # If no matches, then we've hit the end of the chain and this path has no ancestry back to base_path.
181 ui.status(prefix + ">> find_svn_ancestors: Done: No matching changed_paths", level=ui.DEBUG, color='YELLOW')
182 done = True
183 continue
184 # Reverse-sort any matches, so that we start with the most-granular (deepest in the tree) path.
185 changed_paths = sorted(changed_paths_temp, key=operator.itemgetter('path'), reverse=True)
186 # Find the action for our working_path in this revision. Use a loop to check in reverse order,
187 # so that if the target file/folder is "M" but has a parent folder with an "A" copy-from.
188 for v in changed_paths:
189 d = v['data']
190 path = d['path']
191 # Check action-type for this file
192 action = d['action']
193 if action not in _valid_svn_actions:
194 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
195 % (log_entry['revision'], action))
196 ui.status(prefix + "> %s %s%s", action, path,
197 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
198 level=ui.DEBUG, color='YELLOW')
199 if action == 'D':
200 # If file/folder was deleted, it has no ancestor
201 ancestors_temp = []
202 ui.status(prefix + ">> find_svn_ancestors: Done: deleted", level=ui.DEBUG, color='YELLOW')
203 done = True
204 break
205 if action in 'RA':
206 # If file/folder was added/replaced but not a copy, it has no ancestor
207 if not d['copyfrom_path']:
208 ancestors_temp = []
209 ui.status(prefix + ">> find_svn_ancestors: Done: %s with no copyfrom_path",
210 "Added" if action == "A" else "Replaced",
211 level=ui.DEBUG, color='YELLOW')
212 done = True
213 break
214 # Else, file/folder was added/replaced and is a copy, so add an entry to our ancestors list
215 # and keep checking for ancestors
216 ui.status(prefix + ">> find_svn_ancestors: Found copy-from (action=%s): %s --> %s",
217 action, path, d['copyfrom_path']+"@"+str(d['copyfrom_revision']),
218 level=ui.DEBUG, color='YELLOW')
219 ancestors_temp.append({'path': path, 'revision': log_entry['revision'],
220 'copyfrom_path': d['copyfrom_path'], 'copyfrom_rev': d['copyfrom_revision']})
221 working_path = working_path.replace(d['path'], d['copyfrom_path'])
222 working_rev = d['copyfrom_revision']
223 # Follow the copy and keep on searching
224 break
225 ancestors = []
226 if ancestors_temp:
227 ancestors.append({'path': base_path+"/"+source_path, 'revision': source_rev})
228 working_path = base_path+"/"+source_path
229 for idx in range(len(ancestors_temp)):
230 d = ancestors_temp[idx]
231 working_path = working_path.replace(d['path'], d['copyfrom_path'])
232 working_rev = d['copyfrom_rev']
233 ancestors.append({'path': working_path, 'revision': working_rev})
234 if ui.get_level() >= ui.DEBUG:
235 max_len = 0
236 for idx in range(len(ancestors)):
237 d = ancestors[idx]
238 max_len = max(max_len, len(d['path']+"@"+str(d['revision'])))
239 ui.status(prefix + ">> find_svn_ancestors: Found parent ancestors:", level=ui.DEBUG, color='YELLOW_B')
240 for idx in range(len(ancestors)-1):
241 d = ancestors[idx]
242 d_next = ancestors[idx+1]
243 ui.status(prefix + " [%s] %s <-- %s", idx,
244 str(d['path']+"@"+str(d['revision'])).ljust(max_len),
245 str(d_next['path']+"@"+str(d_next['revision'])).ljust(max_len),
246 level=ui.DEBUG, color='YELLOW')
247 else:
248 ui.status(prefix + ">> find_svn_ancestors: No ancestor-chain found: %s",
249 svn_repos_url+base_path+"/"+source_path+"@"+str(source_rev), level=ui.DEBUG, color='YELLOW')
250 return ancestors
251
252 def get_rev_map(source_rev, prefix):
253 """
254 Find the equivalent rev # in the target repo for the given rev # from the source repo.
255 """
256 ui.status(prefix + ">> get_rev_map(%s)", source_rev, level=ui.DEBUG, color='GREEN')
257 # Find the highest entry less-than-or-equal-to source_rev
258 for rev in range(int(source_rev), 0, -1):
259 ui.status(prefix + ">> get_rev_map: rev=%s in_rev_map=%s", rev, str(rev in rev_map), level=ui.DEBUG, color='BLACK_B')
260 if rev in rev_map:
261 return int(rev_map[rev])
262 # Else, we fell off the bottom of the rev_map. Ruh-roh...
263 return None
264
265 def set_rev_map(source_rev, target_rev):
266 ui.status(">> set_rev_map: source_rev=%s target_rev=%s", source_rev, target_rev, level=ui.DEBUG, color='GREEN')
267 global rev_map
268 rev_map[int(source_rev)]=int(target_rev)
269
270 def build_rev_map(target_url, source_info):
271 """
272 Check for any already-replayed history from source_url (source_info) and
273 build the mapping-table of source_rev -> target_rev.
274 """
275 global rev_map
276 rev_map = {}
277 ui.status("Rebuilding rev_map...", level=ui.VERBOSE)
278 proc_count = 0
279 it_log_entries = svnclient.iter_svn_log_entries(target_url, 1, 'HEAD', get_changed_paths=False, get_revprops=True)
280 for log_entry in it_log_entries:
281 if log_entry['revprops']:
282 revprops = {}
283 for v in log_entry['revprops']:
284 if v['name'].startswith('svn2svn:'):
285 revprops[v['name']] = v['value']
286 if revprops['svn2svn:source_uuid'] == source_info['repos_uuid'] and \
287 revprops['svn2svn:source_url'] == source_info['url']:
288 source_rev = revprops['svn2svn:source_rev']
289 target_rev = log_entry['revision']
290 set_rev_map(source_rev, target_rev)
291
292 def get_svn_dirlist(svn_path, svn_rev = ""):
293 """
294 Get a list of all the child contents (recusive) of the given folder path.
295 """
296 args = ["list"]
297 path = svn_path
298 if svn_rev:
299 args += ["-r", svn_rev]
300 path += "@"+str(svn_rev)
301 args += [path]
302 paths = run_svn(args, no_fail=True)
303 paths = paths.strip("\n").split("\n") if len(paths)>1 else []
304 return paths
305
306 def path_in_list(paths, path):
307 for p in paths:
308 if path.startswith(p):
309 return True
310 return False
311
312 def add_path(paths, path):
313 if not path_in_list(paths, path):
314 paths.append(path)
315
316 def do_svn_add(path_offset, source_rev, parent_copyfrom_path="", parent_copyfrom_rev="", \
317 export_paths={}, is_dir = False, prefix = ""):
318 """
319 Given the add'd source path, replay the "svn add/copy" commands to correctly
320 track renames across copy-from's.
321
322 For example, consider a sequence of events like this:
323 1. svn copy /trunk /branches/fix1
324 2. (Make some changes on /branches/fix1)
325 3. svn mv /branches/fix1/Proj1 /branches/fix1/Proj2 " Rename folder
326 4. svn mv /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt " Rename file inside renamed folder
327 5. svn co /trunk && svn merge /branches/fix1
328 After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
329 and and add of /trunk/Proj2 copy-from /branches/fix1/Proj2. If we were just
330 to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
331 we'd lose the logical history that Proj2/file2.txt is really a descendant
332 of Proj1/file1.txt.
333
334 'path_offset' is the offset from source_base to the file to check ancestry for,
335 e.g. 'projectA/file1.txt'. path = source_repos_url + source_base + path_offset.
336 'source_rev' is the revision ("svn log") that we're processing from the source repo.
337 'parent_copyfrom_path' and 'parent_copyfrom_rev' is the copy-from path of the parent
338 directory, when being called recursively by do_svn_add_dir().
339 'export_paths' is the list of path_offset's that we've deferred running "svn export" on.
340 'is_dir' is whether path_offset is a directory (rather than a file).
341 """
342 ui.status(prefix + ">> do_svn_add: %s %s", source_base+"/"+path_offset+"@"+str(source_rev),
343 " (parent-copyfrom: "+parent_copyfrom_path+"@"+str(parent_copyfrom_rev)+")" if parent_copyfrom_path else "",
344 level=ui.DEBUG, color='GREEN')
345 # Check if the given path has ancestors which chain back to the current source_base
346 found_ancestor = False
347 ancestors = find_svn_ancestors(source_repos_url, source_base, path_offset, source_rev, prefix+" ")
348 # ancestors[n] is the original (pre-branch-copy) trunk path.
349 # ancestors[n-1] is the first commit on the new branch.
350 copyfrom_path = ancestors[len(ancestors)-1]['path'] if ancestors else ""
351 copyfrom_rev = ancestors[len(ancestors)-1]['revision'] if ancestors else ""
352 if ancestors:
353 # The copy-from path has ancestory back to source_url.
354 ui.status(prefix + ">> do_svn_add: Check copy-from: Found parent: %s", copyfrom_path+"@"+str(copyfrom_rev),
355 level=ui.DEBUG, color='GREEN', bold=True)
356 found_ancestor = True
357 # Map the copyfrom_rev (source repo) to the equivalent target repo rev #. This can
358 # return None in the case where copyfrom_rev is *before* our source_start_rev.
359 tgt_rev = get_rev_map(copyfrom_rev, prefix+" ")
360 ui.status(prefix + ">> do_svn_add: get_rev_map: %s (source) -> %s (target)", copyfrom_rev, tgt_rev, level=ui.DEBUG, color='GREEN')
361 else:
362 ui.status(prefix + ">> do_svn_add: Check copy-from: No ancestor chain found.", level=ui.DEBUG, color='GREEN')
363 found_ancestor = False
364 if found_ancestor and tgt_rev:
365 # Check if this path_offset in the target WC already has this ancestry, in which
366 # case there's no need to run the "svn copy" (again).
367 path_in_svn = in_svn(path_offset, prefix=prefix+" ")
368 log_entry = svnclient.get_last_svn_log_entry(path_offset, 1, 'HEAD', get_changed_paths=False) if in_svn(path_offset, require_in_repo=True, prefix=prefix+" ") else []
369 if (not log_entry or (log_entry['revision'] != tgt_rev)):
370 copyfrom_offset = copyfrom_path[len(source_base):].strip('/')
371 ui.status(prefix + ">> do_svn_add: svn_copy: Copy-from: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
372 ui.status(prefix + " copyfrom: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
373 ui.status(prefix + " p_copyfrom: %s", parent_copyfrom_path+"@"+str(parent_copyfrom_rev) if parent_copyfrom_path else "", level=ui.DEBUG, color='GREEN')
374 if path_in_svn and \
375 ((parent_copyfrom_path and copyfrom_path.startswith(parent_copyfrom_path)) and \
376 (parent_copyfrom_rev and copyfrom_rev == parent_copyfrom_rev)):
377 # When being called recursively, if this child entry has the same ancestor as the
378 # the parent, then no need to try to run another "svn copy".
379 ui.status(prefix + ">> do_svn_add: svn_copy: Same ancestry as parent: %s",
380 parent_copyfrom_path+"@"+str(parent_copyfrom_rev),level=ui.DEBUG, color='GREEN')
381 pass
382 else:
383 # Copy this path from the equivalent path+rev in the target repo, to create the
384 # equivalent history.
385 if parent_copyfrom_path:
386 # If we have a parent copy-from path, we mis-match that so display a status
387 # message describing the action we're mimic'ing. If path_in_svn, then this
388 # is logically a "replace" rather than an "add".
389 ui.status(" %s %s (from %s)", ('R' if path_in_svn else 'A'), source_base+"/"+path_offset, ancestors[1]['path']+"@"+str(copyfrom_rev), level=ui.VERBOSE)
390 if path_in_svn:
391 # If local file is already under version-control, then this is a replace.
392 ui.status(prefix + ">> do_svn_add: pre-copy: local path already exists: %s", path_offset, level=ui.DEBUG, color='GREEN')
393 run_svn(["remove", "--force", path_offset])
394 run_svn(["copy", "-r", tgt_rev, target_url+"/"+copyfrom_offset+"@"+str(tgt_rev), path_offset])
395 # Export the final version of this file/folder from the source repo, to make
396 # sure we're up-to-date.
397 add_path(export_paths, path_offset)
398 else:
399 ui.status(prefix + ">> do_svn_add: Skipped 'svn copy': %s", path_offset, level=ui.DEBUG, color='GREEN')
400 else:
401 # Else, either this copy-from path has no ancestry back to source_url OR copyfrom_rev comes
402 # before our initial source_start_rev (i.e. tgt_rev == None), so can't do a "svn copy".
403 # Create (parent) directory if needed.
404 # TODO: This is (nearly) a duplicate of code in process_svn_log_entry(). Should this be
405 # split-out to a shared tag?
406 p_path = path_offset if is_dir else os.path.dirname(path_offset).strip() or '.'
407 if not os.path.exists(p_path):
408 run_svn(["mkdir", p_path])
409 if not in_svn(path_offset, prefix=prefix+" "):
410 if is_dir:
411 # Export the final verison of all files in this folder.
412 add_path(export_paths, path_offset)
413 else:
414 # Export the final verison of this file. We *need* to do this before running
415 # the "svn add", even if we end-up re-exporting this file again via export_paths.
416 run_svn(["export", "--force", "-r", source_rev,
417 source_repos_url+source_base+"/"+path_offset+"@"+str(source_rev), path_offset])
418 # If not already under version-control, then "svn add" this file/folder.
419 run_svn(["add", "--parents", path_offset])
420 # TODO: Need to copy SVN properties from source repos
421 if is_dir:
422 # For any folders that we process, process any child contents, so that we correctly
423 # replay copies/replaces/etc.
424 do_svn_add_dir(path_offset, source_rev, copyfrom_path, copyfrom_rev, export_paths, prefix+" ")
425
426 def do_svn_add_dir(path_offset, source_rev, parent_copyfrom_path, parent_copyfrom_rev, \
427 export_paths, prefix=""):
428 # Get the directory contents, to compare between the local WC (target_url) vs. the remote repo (source_url)
429 # TODO: paths_local won't include add'd paths because "svn ls" lists the contents of the
430 # associated remote repo folder. (Is this a problem?)
431 paths_local = get_svn_dirlist(path_offset)
432 paths_remote = get_svn_dirlist(source_url+"/"+path_offset, source_rev)
433 ui.status(prefix + ">> do_svn_add_dir: paths_local: %s", str(paths_local), level=ui.DEBUG, color='GREEN')
434 ui.status(prefix + ">> do_svn_add_dir: paths_remote: %s", str(paths_remote), level=ui.DEBUG, color='GREEN')
435 # Update files/folders which exist in remote but not local
436 for path in paths_remote:
437 path_is_dir = True if path[-1] == "/" else False
438 working_path = path_offset+"/"+(path.rstrip('/') if path_is_dir else path)
439 do_svn_add(working_path, source_rev, parent_copyfrom_path, parent_copyfrom_rev,
440 export_paths, path_is_dir, prefix+" ")
441 # Remove files/folders which exist in local but not remote
442 for path in paths_local:
443 if not path in paths_remote:
444 ui.status(" %s %s", 'D', source_base+"/"+path_offset+"/"+path, level=ui.VERBOSE)
445 run_svn(["remove", "--force", path_offset+"/"+path])
446 # TODO: Does this handle deleted folders too? Wouldn't want to have a case
447 # where we only delete all files from folder but leave orphaned folder around.
448
449 def process_svn_log_entry(log_entry, options, commit_paths, prefix = ""):
450 """
451 Process SVN changes from the given log entry. Build an array (commit_paths)
452 of the paths in the working-copy that were changed, i.e. the paths which
453 we'll pass to "svn commit".
454 """
455 export_paths = []
456 source_rev = log_entry['revision']
457 ui.status(prefix + ">> process_svn_log_entry: %s", source_url+"@"+str(source_rev), level=ui.DEBUG, color='GREEN')
458 for d in log_entry['changed_paths']:
459 # Get the full path for this changed_path
460 # e.g. '/branches/bug123/projectA/file1.txt'
461 path = d['path']
462 if not path.startswith(source_base + "/"):
463 # Ignore changed files that are not part of this subdir
464 if path != source_base:
465 ui.status(prefix + ">> process_svn_log_entry: Unrelated path: %s (base: %s)", path, source_base, level=ui.DEBUG, color='GREEN')
466 continue
467 assert len(d['kind'])>0
468 path_is_dir = True if d['kind'] == 'dir' else False
469 # Calculate the offset (based on source_base) for this changed_path
470 # e.g. 'projectA/file1.txt'
471 # (path = source_base + "/" + path_offset)
472 path_offset = path[len(source_base):].strip("/")
473 # Get the action for this path
474 action = d['action']
475 if action not in _valid_svn_actions:
476 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
477 % (source_rev, action))
478 ui.status(" %s %s%s", action, d['path'],
479 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
480 level=ui.VERBOSE)
481
482 # Try to be efficient and keep track of an explicit list of paths in the
483 # working copy that changed. If we commit from the root of the working copy,
484 # then SVN needs to crawl the entire working copy looking for pending changes.
485 add_path(commit_paths, path_offset)
486
487 # Special-handling for replace's
488 if action == 'R':
489 # If file was "replaced" (deleted then re-added, all in same revision),
490 # then we need to run the "svn rm" first, then change action='A'. This
491 # lets the normal code below handle re-"svn add"'ing the files. This
492 # should replicate the "replace".
493 run_svn(["remove", "--force", path_offset])
494 action = 'A'
495
496 # Handle all the various action-types
497 # (Handle "add" first, for "svn copy/move" support)
498 if action == 'A':
499 # Determine where to export from.
500 svn_copy = False
501 # Handle cases where this "add" was a copy from another URL in the source repos
502 if d['copyfrom_revision']:
503 copyfrom_path = d['copyfrom_path']
504 copyfrom_rev = d['copyfrom_revision']
505 do_svn_add(path_offset, source_rev, "", "", export_paths, path_is_dir, prefix+" ")
506 # Else just "svn export" the files from the source repo and "svn add" them.
507 else:
508 # Create (parent) directory if needed
509 p_path = path_offset if path_is_dir else os.path.dirname(path_offset).strip() or '.'
510 if not os.path.exists(p_path):
511 run_svn(["mkdir", p_path])
512 # Export the entire added tree.
513 if path_is_dir:
514 # For directories, defer the (recurisve) "svn export". Might have a
515 # situation in a branch merge where the entry in the svn-log is a
516 # non-copy-from'd "add" but there are child contents (that we haven't
517 # gotten to yet in log_entry) that are copy-from's. When we try do
518 # the "svn copy" later on in do_svn_add() for those copy-from'd paths,
519 # having pre-existing (svn-add'd) contents creates some trouble.
520 # Instead, just create the stub folders ("svn mkdir" above) and defer
521 # exporting the final file-state until the end.
522 add_path(export_paths, path_offset)
523 else:
524 # Export the final verison of this file. We *need* to do this before running
525 # the "svn add", even if we end-up re-exporting this file again via export_paths.
526 run_svn(["export", "--force", "-r", source_rev,
527 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
528 if not in_svn(path_offset, prefix=prefix+" "):
529 # Need to use in_svn here to handle cases where client committed the parent
530 # folder and each indiv sub-folder.
531 run_svn(["add", "--parents", path_offset])
532 # TODO: Need to copy SVN properties from source repos
533
534 elif action == 'D':
535 run_svn(["remove", "--force", path_offset])
536
537 elif action == 'M':
538 # TODO: Is "svn merge -c" correct here? Should this just be an "svn export" plus
539 # proplist updating?
540 out = run_svn(["merge", "-c", source_rev, "--non-recursive",
541 "--non-interactive", "--accept=theirs-full",
542 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
543
544 else:
545 raise InternalError("Internal Error: process_svn_log_entry: Unhandled 'action' value: '%s'"
546 % action)
547
548 # Export the final version of all add'd paths from source_url
549 if export_paths:
550 for path_offset in export_paths:
551 run_svn(["export", "--force", "-r", source_rev,
552 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
553
554 # Optionally verify that we replayed everything correctly
555 if options.verify:
556 wc_info = svnclient.get_svn_info(".")
557 target_rev = wc_info['revision']
558 #checked_paths = []
559 for d in log_entry['changed_paths']:
560 path = d['path']
561 if not path.startswith(source_base + "/"):
562 continue
563 path_offset = path[len(source_base):].strip("/")
564 #if path_in_list(checked_paths, path_offset):
565 # continue
566 #add_path(checked_paths, path_offset)
567 action = d['action']
568 assert len(d['kind'])>0
569 path_is_dir = True if d['kind'] == 'dir' else False
570 desc = " %s %s%s" % (action, d['path'], (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "")
571 status = svnclient.get_svn_status(path_offset)[0]
572 print status
573 if action not in _valid_svn_actions:
574 continue
575 if action == 'D':
576 action_type = "delete"
577 status_status = "deleted"
578 if os.path.exists(path_offset) and not path_is_dir:
579 # Don't complain if (root) directory still exists. Root deleted directory
580 # will be removed during the commit.
581 raise VerificationError("Path exists in _wc_target:\n%s" % \
582 (desc))
583 # Verify that _wc_target tracks this as a deletion
584 if not status['status'] == status_status:
585 raise VerificationError("Path not scheduled for %s in _wc_target:\n%s" % \
586 (action_type, desc))
587 if action in 'AR':
588 action_type = ("add" if action == "A" else "replace")
589 status_status = ("added" if action == "A" else "replaced")
590 if not os.path.exists(path_offset):
591 raise VerificationError("Path doesn't exist in _wc_target:\n%s" % \
592 (desc))
593 # Verify that _wc_target tracks this as an add/replace
594 if not status['status'] == status_status:
595 raise VerificationError("Path not scheduled for %s in _wc_target:\n%s" % \
596 (action_type, desc))
597 source_ancestors = find_svn_ancestors(source_repos_url, source_base, \
598 path_offset, source_rev, prefix+" ") if d['copyfrom_path'] \
599 else []
600 if not source_ancestors:
601 # For copyfrom=False, check that local path doesn't have any ancestry
602 if status['copied'] and status['copied'] == 'true':
603 raise VerificationError("Path copy-from'd in _wc_target:\n%s" % \
604 (desc))
605 else:
606 # For copyfrom=True, need to recursively check all child contents.
607 if not status['copied'] or status['copied'] != 'true':
608 raise VerificationError("Path not copy-from'd in _wc_target:\n%s" % \
609 (desc))
610 target_ancestors = find_svn_ancestors(target_repos_url, target_base, \
611 path_offset, target_rev, prefix+" ")
612 source_copyfrom_path = source_ancestors[len(source_ancestors)-1]['path']
613 source_copyfrom_offset = source_copyfrom_path[len(source_base):].strip('/')
614 source_copyfrom_rev = source_ancestors[len(source_ancestors)-1]['revision']
615 target_copyfrom_path = target_ancestors[len(target_ancestors)-1]['path']
616 target_copyfrom_offset = target_copyfrom_path[len(target_base):].strip('/')
617 target_copyfrom_rev = target_ancestors[len(target_ancestors)-1]['revision']
618 if source_copyfrom_offset != target_copyfrom_offset:
619 raise VerificationError("Path not copy-from'd same path_offset: source_copyfrom=%s, target_copyfrom=%s:\n%s" % \
620 (source_copyfrom_path+"@"+str(source_copyfrom_rev), target_copyfrom_path+"@"+str(target_copyfrom_rev), desc))
621 source_tgt_rev = get_rev_map(rev_map, source_copyfrom_rev, prefix+" ")
622 if source_tgt_rev != target_copyfrom_rev:
623 raise VerificationError("Path not copy-from'd same revision: get_rev_map(%s)=%s, target_rev=%s\n%s" % \
624 (source_copyfrom_rev, source_tgt_rev, target_rev, desc))
625 # TODO: Recursively check all child contents
626 pass
627 if action == 'M':
628 action_type = "modified"
629 if not os.path.exists(path_offset):
630 raise VerificationError(" %s %s" % (action, path_offset))
631 if path_is_dir:
632 # TODO: Is there anything to check for directories? Maybe properties?
633 pass
634 else:
635 # TODO: Use md5sum to compare both the remote and local file?
636 pass
637
638 def disp_svn_log_summary(log_entry):
639 ui.status("")
640 ui.status("r%s | %s | %s",
641 log_entry['revision'],
642 log_entry['author'],
643 str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' ')))
644 ui.status(log_entry['message'])
645 ui.status("------------------------------------------------------------------------")
646
647 def real_main(options, args):
648 global source_url, target_url, rev_map
649 source_url = args.pop(0).rstrip("/") # e.g. 'http://server/svn/source/trunk'
650 target_url = args.pop(0).rstrip("/") # e.g. 'file:///svn/target/trunk'
651 ui.status("options: %s", str(options), level=ui.DEBUG, color='GREEN')
652
653 # Make sure that both the source and target URL's are valid
654 source_info = svnclient.get_svn_info(source_url)
655 assert source_url.startswith(source_info['repos_url'])
656 target_info = svnclient.get_svn_info(target_url)
657 assert target_url.startswith(target_info['repos_url'])
658
659 # Init global vars
660 global source_repos_url,source_base,source_repos_uuid
661 source_repos_url = source_info['repos_url'] # e.g. 'http://server/svn/source'
662 source_base = source_url[len(source_repos_url):] # e.g. '/trunk'
663 source_repos_uuid = source_info['repos_uuid']
664
665 source_end_rev = source_info['revision'] # Last revision # in the source repo
666 wc_target = os.path.abspath('_wc_target')
667 num_entries_proc = 0
668 commit_count = 0
669 source_rev = None
670 target_rev = None
671
672 # Check out a working copy of target_url if needed
673 wc_exists = os.path.exists(wc_target)
674 if wc_exists and not options.cont_from_break:
675 shutil.rmtree(wc_target)
676 wc_exists = False
677 if not wc_exists:
678 svnclient.svn_checkout(target_url, wc_target)
679 os.chdir(wc_target)
680
681 if not options.cont_from_break:
682 # TODO: Warn user if trying to start (non-continue) into a non-empty target path?
683 # Get log entry for the SVN revision we will check out
684 if options.svn_rev:
685 # If specify a rev, get log entry just before or at rev
686 source_start_log = svnclient.get_last_svn_log_entry(source_url, 1, options.svn_rev, False)
687 else:
688 # Otherwise, get log entry of branch creation
689 # Note: Trying to use svnclient.get_first_svn_log_entry(source_url, 1, source_end_rev, False)
690 # ends-up being *VERY* time-consuming on a repo with lots of revisions. Even though
691 # the "svn log" call is passing --limit 1, it seems like that limit-filter is happening
692 # _after_ svn has fetched the full log history. Instead, search the history in chunks
693 # and write some progress to the screen.
694 ui.status("Searching for start source revision (%s)...", source_url, level=ui.VERBOSE)
695 rev = 1
696 chunk_size = 1000
697 done = False
698 while not done:
699 entries = svnclient.run_svn_log(source_url, rev, min(rev+chunk_size-1, target_info['revision']), 1, get_changed_paths=False)
700 if entries:
701 source_start_log = entries[0]
702 done = True
703 break
704 ui.status("...%s...", rev)
705 rev = rev+chunk_size
706 if rev > target_info['revision']:
707 done = True
708 if not source_start_log:
709 raise InternalError("Unable to find first revision for source_url: %s" % source_url)
710
711 # This is the revision we will start from for source_url
712 source_start_rev = source_rev = int(source_start_log['revision'])
713 ui.status("Starting at source revision %s.", source_start_rev, level=ui.VERBOSE)
714
715 # For the initial commit to the target URL, export all the contents from
716 # the source URL at the start-revision.
717 paths = run_svn(["list", "-r", source_rev, source_url+"@"+str(source_rev)])
718 if len(paths)>1:
719 disp_svn_log_summary(svnclient.get_one_svn_log_entry(source_url, source_rev, source_rev))
720 ui.status("(Initial import)", level=ui.VERBOSE)
721 paths = paths.strip("\n").split("\n")
722 for path_raw in paths:
723 # For each top-level file/folder...
724 if not path_raw:
725 continue
726 # Directories have a trailing slash in the "svn list" output
727 path_is_dir = True if path_raw[-1] == "/" else False
728 path = path_raw.rstrip('/') if path_is_dir else path_raw
729 if path_is_dir and not os.path.exists(path):
730 os.makedirs(path)
731 ui.status(" A %s", source_url[len(source_repos_url):]+"/"+path, level=ui.VERBOSE)
732 run_svn(["export", "--force", "-r" , source_rev, source_url+"/"+path+"@"+str(source_rev), path])
733 run_svn(["add", path])
734 num_entries_proc += 1
735 target_revprops = gen_tracking_revprops(source_rev) # Build source-tracking revprop's
736 target_rev = commit_from_svn_log_entry(source_start_log, options, target_revprops=target_revprops)
737 if target_rev:
738 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
739 set_rev_map(source_rev, target_rev)
740 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
741 run_svn(["update"])
742 commit_count += 1
743 else:
744 # Re-build the rev_map based on any already-replayed history in target_url
745 build_rev_map(target_url, source_info)
746 if not rev_map:
747 raise RuntimeError("Called with continue-mode, but no already-replayed history found in target repo: %s" % target_url)
748 source_start_rev = int(max(rev_map, key=rev_map.get))
749 assert source_start_rev
750 ui.status("Continuing from source revision %s.", source_start_rev, level=ui.VERBOSE)
751
752 svn_vers_t = svnclient.get_svn_client_version()
753 svn_vers = float(".".join(map(str, svn_vers_t[0:2])))
754
755 # Load SVN log starting from source_start_rev + 1
756 it_log_entries = svnclient.iter_svn_log_entries(source_url, source_start_rev+1, source_end_rev, get_revprops=True)
757 source_rev = None
758
759 try:
760 for log_entry in it_log_entries:
761 if options.entries_proc_limit:
762 if num_entries_proc >= options.entries_proc_limit:
763 break
764 # Replay this revision from source_url into target_url
765 disp_svn_log_summary(log_entry)
766 source_rev = log_entry['revision']
767 # Process all the changed-paths in this log entry
768 commit_paths = []
769 process_svn_log_entry(log_entry, options, commit_paths)
770 num_entries_proc += 1
771 # Commit any changes made to _wc_target
772 target_revprops = gen_tracking_revprops(source_rev) # Build source-tracking revprop's
773 target_rev = commit_from_svn_log_entry(log_entry, options, commit_paths, target_revprops=target_revprops)
774 if target_rev:
775 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
776 source_rev = log_entry['revision']
777 set_rev_map(source_rev, target_rev)
778 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
779 run_svn(["update"])
780 commit_count += 1
781 # Run "svn cleanup" every 100 commits if SVN 1.7+, to clean-up orphaned ".svn/pristines/*"
782 if svn_vers >= 1.7 and (commit_count % 100 == 0):
783 run_svn(["cleanup"])
784 if not source_rev:
785 # If there were no new source_url revisions to process, init source_rev
786 # for the "finally" message below.
787 source_rev = source_end_rev
788
789 except KeyboardInterrupt:
790 print "\nStopped by user."
791 print "\nCleaning-up..."
792 run_svn(["cleanup"])
793 full_svn_revert()
794 except:
795 print "\nCommand failed with following error:\n"
796 traceback.print_exc()
797 print "\nCleaning-up..."
798 run_svn(["cleanup"])
799 print run_svn(["status"])
800 full_svn_revert()
801 finally:
802 print "\nFinished at source revision %s%s." % (source_rev, " (dry-run)" if options.dry_run else "")
803
804 def main():
805 # Defined as entry point. Must be callable without arguments.
806 usage = "Usage: %prog [OPTIONS] source_url target_url"
807 description = """\
808 Replicate (replay) history from one SVN repository to another. Maintain
809 logical ancestry wherever possible, so that 'svn log' on the replayed
810 repo will correctly follow file/folder renames.
811
812 == Examples ==
813 Create a copy of only /trunk from source repo, starting at r5000
814 $ svnadmin create /svn/target
815 $ svn mkdir -m 'Add trunk' file:///svn/target/trunk
816 $ svn2svn -av -r 5000 http://server/source/trunk file:///svn/target/trunk
817 1. The target_url will be checked-out to ./_wc_target
818 2. The first commit to http://server/source/trunk at/after r5000 will be
819 exported & added into _wc_target
820 3. All revisions affecting http://server/source/trunk (starting at r5000)
821 will be replayed to _wc_target. Any add/copy/move/replaces that are
822 copy-from'd some path outside of /trunk (e.g. files renamed on a
823 /branch and branch was merged into /trunk) will correctly maintain
824 logical ancestry where possible.
825
826 Use continue-mode (-c) to pick-up where the last run left-off
827 $ svn2svn -avc http://server/source/trunk file:///svn/target/trunk
828 1. The target_url will be checked-out to ./_wc_target, if not already
829 checked-out
830 2. All new revisions affecting http://server/source/trunk starting from
831 the last replayed revision to file:///svn/target/trunk (based on the
832 svn2svn:* revprops) will be replayed to _wc_target, maintaining all
833 logical ancestry where possible."""
834 parser = optparse.OptionParser(usage, description=description,
835 formatter=HelpFormatter(), version="%prog "+str(full_version))
836 #parser.remove_option("--help")
837 #parser.add_option("-h", "--help", dest="show_help", action="store_true",
838 # help="show this help message and exit")
839 parser.add_option("-r", "--revision", type="int", dest="svn_rev", metavar="REV",
840 help="initial SVN revision to start source_url replay")
841 parser.add_option("-a", "--keep-author", action="store_true", dest="keep_author", default=False,
842 help="maintain original 'Author' info from source repo")
843 parser.add_option("-c", "--continue", action="store_true", dest="cont_from_break",
844 help="continue from previous break")
845 parser.add_option("-x", "--verify", action="store_true", dest="verify", default=False,
846 help="verify ancestry and content before target commits")
847 parser.add_option("-l", "--limit", type="int", dest="entries_proc_limit", metavar="NUM",
848 help="maximum number of log entries to process")
849 parser.add_option("-n", "--dry-run", action="store_true", dest="dry_run", default=False,
850 help="try processing next log entry but don't commit changes to "
851 "target working-copy (forces --limit=1)")
852 parser.add_option("-v", "--verbose", dest="verbosity", action="count", default=1,
853 help="enable additional output (use -vv or -vvv for more)")
854 parser.add_option("--debug", dest="verbosity", const=ui.DEBUG, action="store_const",
855 help="enable debugging output (same as -vvv)")
856 options, args = parser.parse_args()
857 if len(args) != 2:
858 parser.error("incorrect number of arguments")
859 if options.verbosity < 10:
860 # Expand multiple "-v" arguments to a real ui._level value
861 options.verbosity *= 10
862 if options.dry_run:
863 # When in dry-run mode, only try to process the next log_entry
864 options.entries_proc_limit = 1
865 ui.update_config(options)
866 return real_main(options, args)
867
868
869 if __name__ == "__main__":
870 sys.exit(main() or 0)