]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn/run/svn2svn.py
WIP on verify
[svn2svn.git] / svn2svn / run / svn2svn.py
1 """ Replicate (replay) changesets from one SVN repository to another. """
2 from .. import base_version, full_version
3 from .. import ui
4 from .. import svnclient
5 from ..shell import run_svn
6 from ..errors import (ExternalCommandFailed, UnsupportedSVNAction, InternalError, VerificationError)
7 from parse import HelpFormatter
8
9 import sys
10 import os
11 import time
12 import traceback
13 import shutil
14 import operator
15 import optparse
16 from datetime import datetime
17
18 _valid_svn_actions = "MARD" # The list of known SVN action abbr's, from "svn log"
19
20 # Module-level variables/parameters
21 source_url = "" # URL to source path in source SVN repo, e.g. 'http://server/svn/source/trunk'
22 source_repos_url = "" # URL to root of source SVN repo, e.g. 'http://server/svn/source'
23 source_base = "" # Relative path of source_url in source SVN repo, e.g. '/trunk'
24 source_repos_uuid = "" # UUID of source SVN repo
25 target_url ="" # URL to target path in target SVN repo, e.g. 'file:///svn/repo_target/trunk'
26 target_repos_url = "" # URL to root of target SVN repo, e.g. 'file:///svn/repo_target'
27 target_base = "" # Relative path of target_url in target SVN repo, e.g. '/trunk'
28 rev_map = {} # The running mapping-table dictionary for source_url rev #'s -> target_url rev #'s
29
30 def commit_from_svn_log_entry(log_entry, options, commit_paths=None, target_revprops=None):
31 """
32 Given an SVN log entry and an optional list of changed paths, do an svn commit.
33 """
34 # TODO: Run optional external shell hook here, for doing pre-commit filtering
35 # Display the _wc_target "svn status" info if running in -vv (or higher) mode
36 if ui.get_level() >= ui.EXTRA:
37 ui.status(">> commit_from_svn_log_entry: Pre-commit _wc_target status:", level=ui.EXTRA, color='CYAN')
38 ui.status(run_svn(["status"]), level=ui.EXTRA, color='CYAN')
39 # This will use the local timezone for displaying commit times
40 timestamp = int(log_entry['date'])
41 svn_date = str(datetime.fromtimestamp(timestamp))
42 # Uncomment this one one if you prefer UTC commit times
43 #svn_date = "%d 0" % timestamp
44 args = ["commit", "--force-log"]
45 if options.keep_author:
46 args += ["-m", log_entry['message'] + "\nDate: " + svn_date, "--username", log_entry['author']]
47 else:
48 args += ["-m", log_entry['message'] + "\nDate: " + svn_date + "\nAuthor: " + log_entry['author']]
49 revprops = {}
50 if log_entry['revprops']:
51 # Carry forward any revprop's from the source revision
52 for v in log_entry['revprops']:
53 revprops[v['name']] = v['value']
54 if target_revprops:
55 # Add any extra revprop's we want to set for the target repo commits
56 for v in target_revprops:
57 revprops[v['name']] = v['value']
58 if revprops:
59 for key in revprops:
60 args += ["--with-revprop", "%s=%s" % (key, str(revprops[key]))]
61 if commit_paths:
62 if len(commit_paths)<100:
63 # If we don't have an excessive amount of individual changed paths, pass
64 # those to the "svn commit" command. Else, pass nothing so we commit at
65 # the root of the working-copy.
66 args += list(commit_paths)
67 rev = None
68 if not options.dry_run:
69 # Run the "svn commit" command, and screen-scrape the target_rev value (if any)
70 output = run_svn(args)
71 if output:
72 output_lines = output.strip("\n").split("\n")
73 rev = ""
74 for line in output_lines:
75 if line[0:19] == 'Committed revision ':
76 rev = line[19:].rstrip('.')
77 break
78 if rev:
79 ui.status("Committed revision %s.", rev)
80 return rev
81
82 def full_svn_revert():
83 """
84 Do an "svn revert" and proactively remove any extra files in the working copy.
85 """
86 run_svn(["revert", "--recursive", "."])
87 output = run_svn(["status"])
88 if output:
89 output_lines = output.strip("\n").split("\n")
90 for line in output_lines:
91 if line[0] == "?":
92 path = line[4:].strip(" ")
93 if os.path.isfile(path):
94 os.remove(path)
95 if os.path.isdir(path):
96 shutil.rmtree(path)
97
98 def gen_tracking_revprops(source_rev):
99 """
100 Build an array of svn2svn-specific source-tracking revprops.
101 """
102 revprops = [{'name':'svn2svn:source_uuid', 'value':source_repos_uuid},
103 {'name':'svn2svn:source_url', 'value':source_url},
104 {'name':'svn2svn:source_rev', 'value':source_rev}]
105 return revprops
106
107 def in_svn(p, require_in_repo=False, prefix=""):
108 """
109 Check if a given file/folder is being tracked by Subversion.
110 Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
111 With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
112 Use "svn status" to check the status of the file/folder.
113 """
114 entries = svnclient.get_svn_status(p, no_recursive=True)
115 if not entries:
116 return False
117 d = entries[0]
118 if require_in_repo and (d['status'] == 'added' or d['revision'] is None):
119 # If caller requires this path to be in the SVN repo, prevent returning True
120 # for paths that are only locally-added.
121 ret = False
122 else:
123 # Don't consider files tracked as deleted in the WC as under source-control.
124 # Consider files which are locally added/copied as under source-control.
125 ret = True if not (d['status'] == 'deleted') and (d['type'] == 'normal' or d['status'] == 'added' or d['copied'] == 'true') else False
126 ui.status(prefix + ">> in_svn('%s', require_in_repo=%s) --> %s", p, str(require_in_repo), str(ret), level=ui.DEBUG, color='GREEN')
127 return ret
128
129 def find_svn_ancestors(svn_url_or_wc, base_path, source_path, source_rev, prefix = ""):
130 """
131 Given a source path, walk the SVN history backwards to inspect the ancestory of
132 that path, seeing if it traces back to base_path. Build an array of copyfrom_path
133 and copyfrom_revision pairs for each of the "svn copies". If we find a copyfrom_path
134 which base_path is a substring match of (e.g. we crawled back to the initial branch-
135 copy from trunk), then return the collection of ancestor paths. Otherwise,
136 copyfrom_path has no ancestory compared to base_path.
137
138 This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
139 file/folder was renamed in a branch and then that branch was merged back to trunk.
140
141 'svn_repos_url' is the full URL to the root of the SVN repository,
142 e.g. 'file:///path/to/repo'
143 'base_path' is the path in the SVN repo to the target path we're trying to
144 trace ancestry back to, e.g. '/trunk'.
145 'source_path' is the path in the SVN repo to the source path to start checking
146 ancestry at, e.g. '/branches/fix1/projectA/file1.txt'.
147 (full_path = svn_repos_url+base_path+"/"+path_offset)
148 'source_rev' is the revision to start walking the history of source_path backwards from.
149 """
150 info = svnclient.get_svn_info(svn_url_or_wc)
151 svn_repos_url = info['repos_url']
152 is_wc = True if info['is_wc'] else False
153 ui.status(prefix + ">> find_svn_ancestors: Start (url=%s): source_path: %s, base_path: %s, is_wc: %s",
154 info['url'], source_path+"@"+str(source_rev), base_path, str(is_wc), level=ui.DEBUG, color='YELLOW')
155 # Init for first iteration
156 working_path = base_path+"/"+source_path
157 working_rev = source_rev
158 done = False
159 first_iter = True
160 ancestors_temp = []
161 while not done:
162 if is_wc and first_iter:
163 # If checking ancestry for a working-copy, need to use "svn info" to detect copy-from info.
164 info = svnclient.get_svn_info(source_path)
165 print info
166 sys.exit(1)
167 else:
168 # Get the first "svn log" entry for this path (relative to @rev)
169 search_path = working_path if is_wc else (svn_repos_url + working_path+"@"+str(working_rev))
170 ui.status(prefix + ">> find_svn_ancestors: %s", search_path, level=ui.DEBUG, color='YELLOW')
171 log_entry = svnclient.get_first_svn_log_entry(svn_repos_url + working_path+"@"+str(working_rev), 1, working_rev, True)
172 if not log_entry:
173 ui.status(prefix + ">> find_svn_ancestors: Done: no log_entry", level=ui.DEBUG, color='YELLOW')
174 done = True
175 continue
176 # If we found a copy-from case which matches our base_path, we're done.
177 # ...but only if we've at least tried to search for the first copy-from path.
178 if (not first_iter) and working_path.startswith(base_path):
179 ui.status(prefix + ">> find_svn_ancestors: Done: Found working_path.startswith(base_path) and first_iter=False", level=ui.DEBUG, color='YELLOW')
180 done = True
181 continue
182 first_iter = False
183 # Search for any actions on our target path (or parent paths).
184 changed_paths_temp = []
185 for d in log_entry['changed_paths']:
186 path = d['path']
187 if path in working_path:
188 changed_paths_temp.append({'path': path, 'data': d})
189 if not changed_paths_temp:
190 # If no matches, then we've hit the end of the chain and this path has no ancestry back to base_path.
191 ui.status(prefix + ">> find_svn_ancestors: Done: No matching changed_paths", level=ui.DEBUG, color='YELLOW')
192 done = True
193 continue
194 # Reverse-sort any matches, so that we start with the most-granular (deepest in the tree) path.
195 changed_paths = sorted(changed_paths_temp, key=operator.itemgetter('path'), reverse=True)
196 # Find the action for our working_path in this revision. Use a loop to check in reverse order,
197 # so that if the target file/folder is "M" but has a parent folder with an "A" copy-from.
198 for v in changed_paths:
199 d = v['data']
200 path = d['path']
201 # Check action-type for this file
202 action = d['action']
203 if action not in _valid_svn_actions:
204 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
205 % (log_entry['revision'], action))
206 ui.status(prefix + "> %s %s%s", action, path,
207 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
208 level=ui.DEBUG, color='YELLOW')
209 if action == 'D':
210 # If file/folder was deleted, it has no ancestor
211 ancestors_temp = []
212 ui.status(prefix + ">> find_svn_ancestors: Done: deleted", level=ui.DEBUG, color='YELLOW')
213 done = True
214 break
215 if action in 'RA':
216 # If file/folder was added/replaced but not a copy, it has no ancestor
217 if not d['copyfrom_path']:
218 ancestors_temp = []
219 ui.status(prefix + ">> find_svn_ancestors: Done: %s with no copyfrom_path",
220 "Added" if action == "A" else "Replaced",
221 level=ui.DEBUG, color='YELLOW')
222 done = True
223 break
224 # Else, file/folder was added/replaced and is a copy, so add an entry to our ancestors list
225 # and keep checking for ancestors
226 ui.status(prefix + ">> find_svn_ancestors: Found copy-from (action=%s): %s --> %s",
227 action, path, d['copyfrom_path']+"@"+str(d['copyfrom_revision']),
228 level=ui.DEBUG, color='YELLOW')
229 ancestors_temp.append({'path': path, 'revision': log_entry['revision'],
230 'copyfrom_path': d['copyfrom_path'], 'copyfrom_rev': d['copyfrom_revision']})
231 working_path = working_path.replace(d['path'], d['copyfrom_path'])
232 working_rev = d['copyfrom_revision']
233 # Follow the copy and keep on searching
234 break
235 ancestors = []
236 if ancestors_temp:
237 ancestors.append({'path': base_path+"/"+source_path, 'revision': source_rev})
238 working_path = base_path+"/"+source_path
239 for idx in range(len(ancestors_temp)):
240 d = ancestors_temp[idx]
241 working_path = working_path.replace(d['path'], d['copyfrom_path'])
242 working_rev = d['copyfrom_rev']
243 ancestors.append({'path': working_path, 'revision': working_rev})
244 if ui.get_level() >= ui.DEBUG:
245 max_len = 0
246 for idx in range(len(ancestors)):
247 d = ancestors[idx]
248 max_len = max(max_len, len(d['path']+"@"+str(d['revision'])))
249 ui.status(prefix + ">> find_svn_ancestors: Found parent ancestors:", level=ui.DEBUG, color='YELLOW_B')
250 for idx in range(len(ancestors)-1):
251 d = ancestors[idx]
252 d_next = ancestors[idx+1]
253 ui.status(prefix + " [%s] %s <-- %s", idx,
254 str(d['path']+"@"+str(d['revision'])).ljust(max_len),
255 str(d_next['path']+"@"+str(d_next['revision'])).ljust(max_len),
256 level=ui.DEBUG, color='YELLOW')
257 else:
258 ui.status(prefix + ">> find_svn_ancestors: No ancestor-chain found: %s",
259 svn_repos_url+base_path+"/"+source_path+"@"+str(source_rev), level=ui.DEBUG, color='YELLOW')
260 return ancestors
261
262 def get_rev_map(source_rev, prefix):
263 """
264 Find the equivalent rev # in the target repo for the given rev # from the source repo.
265 """
266 ui.status(prefix + ">> get_rev_map(%s)", source_rev, level=ui.DEBUG, color='GREEN')
267 # Find the highest entry less-than-or-equal-to source_rev
268 for rev in range(int(source_rev), 0, -1):
269 ui.status(prefix + ">> get_rev_map: rev=%s in_rev_map=%s", rev, str(rev in rev_map), level=ui.DEBUG, color='BLACK_B')
270 if rev in rev_map:
271 return int(rev_map[rev])
272 # Else, we fell off the bottom of the rev_map. Ruh-roh...
273 return None
274
275 def set_rev_map(source_rev, target_rev):
276 ui.status(">> set_rev_map: source_rev=%s target_rev=%s", source_rev, target_rev, level=ui.DEBUG, color='GREEN')
277 global rev_map
278 rev_map[int(source_rev)]=int(target_rev)
279
280 def build_rev_map(target_url, source_info):
281 """
282 Check for any already-replayed history from source_url (source_info) and
283 build the mapping-table of source_rev -> target_rev.
284 """
285 global rev_map
286 rev_map = {}
287 ui.status("Rebuilding rev_map...", level=ui.VERBOSE)
288 proc_count = 0
289 it_log_entries = svnclient.iter_svn_log_entries(target_url, 1, 'HEAD', get_changed_paths=False, get_revprops=True)
290 for log_entry in it_log_entries:
291 if log_entry['revprops']:
292 revprops = {}
293 for v in log_entry['revprops']:
294 if v['name'].startswith('svn2svn:'):
295 revprops[v['name']] = v['value']
296 if revprops['svn2svn:source_uuid'] == source_info['repos_uuid'] and \
297 revprops['svn2svn:source_url'] == source_info['url']:
298 source_rev = revprops['svn2svn:source_rev']
299 target_rev = log_entry['revision']
300 set_rev_map(source_rev, target_rev)
301
302 def get_svn_dirlist(svn_path, svn_rev = ""):
303 """
304 Get a list of all the child contents (recusive) of the given folder path.
305 """
306 args = ["list"]
307 path = svn_path
308 if svn_rev:
309 args += ["-r", svn_rev]
310 path += "@"+str(svn_rev)
311 args += [path]
312 paths = run_svn(args, no_fail=True)
313 paths = paths.strip("\n").split("\n") if len(paths)>1 else []
314 return paths
315
316 def path_in_list(paths, path):
317 for p in paths:
318 if path.startswith(p):
319 return True
320 return False
321
322 def add_path(paths, path):
323 if not path_in_list(paths, path):
324 paths.append(path)
325
326 def do_svn_add(path_offset, source_rev, parent_copyfrom_path="", parent_copyfrom_rev="", \
327 export_paths={}, is_dir = False, prefix = ""):
328 """
329 Given the add'd source path, replay the "svn add/copy" commands to correctly
330 track renames across copy-from's.
331
332 For example, consider a sequence of events like this:
333 1. svn copy /trunk /branches/fix1
334 2. (Make some changes on /branches/fix1)
335 3. svn mv /branches/fix1/Proj1 /branches/fix1/Proj2 " Rename folder
336 4. svn mv /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt " Rename file inside renamed folder
337 5. svn co /trunk && svn merge /branches/fix1
338 After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
339 and and add of /trunk/Proj2 copy-from /branches/fix1/Proj2. If we were just
340 to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
341 we'd lose the logical history that Proj2/file2.txt is really a descendant
342 of Proj1/file1.txt.
343
344 'path_offset' is the offset from source_base to the file to check ancestry for,
345 e.g. 'projectA/file1.txt'. path = source_repos_url + source_base + path_offset.
346 'source_rev' is the revision ("svn log") that we're processing from the source repo.
347 'parent_copyfrom_path' and 'parent_copyfrom_rev' is the copy-from path of the parent
348 directory, when being called recursively by do_svn_add_dir().
349 'export_paths' is the list of path_offset's that we've deferred running "svn export" on.
350 'is_dir' is whether path_offset is a directory (rather than a file).
351 """
352 ui.status(prefix + ">> do_svn_add: %s %s", source_base+"/"+path_offset+"@"+str(source_rev),
353 " (parent-copyfrom: "+parent_copyfrom_path+"@"+str(parent_copyfrom_rev)+")" if parent_copyfrom_path else "",
354 level=ui.DEBUG, color='GREEN')
355 # Check if the given path has ancestors which chain back to the current source_base
356 found_ancestor = False
357 ancestors = find_svn_ancestors(source_url, source_base, path_offset, source_rev, prefix+" ")
358 # ancestors[n] is the original (pre-branch-copy) trunk path.
359 # ancestors[n-1] is the first commit on the new branch.
360 copyfrom_path = ancestors[len(ancestors)-1]['path'] if ancestors else ""
361 copyfrom_rev = ancestors[len(ancestors)-1]['revision'] if ancestors else ""
362 if ancestors:
363 # The copy-from path has ancestory back to source_url.
364 ui.status(prefix + ">> do_svn_add: Check copy-from: Found parent: %s", copyfrom_path+"@"+str(copyfrom_rev),
365 level=ui.DEBUG, color='GREEN', bold=True)
366 found_ancestor = True
367 # Map the copyfrom_rev (source repo) to the equivalent target repo rev #. This can
368 # return None in the case where copyfrom_rev is *before* our source_start_rev.
369 tgt_rev = get_rev_map(copyfrom_rev, prefix+" ")
370 ui.status(prefix + ">> do_svn_add: get_rev_map: %s (source) -> %s (target)", copyfrom_rev, tgt_rev, level=ui.DEBUG, color='GREEN')
371 else:
372 ui.status(prefix + ">> do_svn_add: Check copy-from: No ancestor chain found.", level=ui.DEBUG, color='GREEN')
373 found_ancestor = False
374 if found_ancestor and tgt_rev:
375 # Check if this path_offset in the target WC already has this ancestry, in which
376 # case there's no need to run the "svn copy" (again).
377 path_in_svn = in_svn(path_offset, prefix=prefix+" ")
378 log_entry = svnclient.get_last_svn_log_entry(path_offset, 1, 'HEAD', get_changed_paths=False) if in_svn(path_offset, require_in_repo=True, prefix=prefix+" ") else []
379 if (not log_entry or (log_entry['revision'] != tgt_rev)):
380 copyfrom_offset = copyfrom_path[len(source_base):].strip('/')
381 ui.status(prefix + ">> do_svn_add: svn_copy: Copy-from: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
382 ui.status(prefix + " copyfrom: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
383 ui.status(prefix + " p_copyfrom: %s", parent_copyfrom_path+"@"+str(parent_copyfrom_rev) if parent_copyfrom_path else "", level=ui.DEBUG, color='GREEN')
384 if path_in_svn and \
385 ((parent_copyfrom_path and copyfrom_path.startswith(parent_copyfrom_path)) and \
386 (parent_copyfrom_rev and copyfrom_rev == parent_copyfrom_rev)):
387 # When being called recursively, if this child entry has the same ancestor as the
388 # the parent, then no need to try to run another "svn copy".
389 ui.status(prefix + ">> do_svn_add: svn_copy: Same ancestry as parent: %s",
390 parent_copyfrom_path+"@"+str(parent_copyfrom_rev),level=ui.DEBUG, color='GREEN')
391 pass
392 else:
393 # Copy this path from the equivalent path+rev in the target repo, to create the
394 # equivalent history.
395 if parent_copyfrom_path:
396 # If we have a parent copy-from path, we mis-match that so display a status
397 # message describing the action we're mimic'ing. If path_in_svn, then this
398 # is logically a "replace" rather than an "add".
399 ui.status(" %s %s (from %s)", ('R' if path_in_svn else 'A'), source_base+"/"+path_offset, ancestors[1]['path']+"@"+str(copyfrom_rev), level=ui.VERBOSE)
400 if path_in_svn:
401 # If local file is already under version-control, then this is a replace.
402 ui.status(prefix + ">> do_svn_add: pre-copy: local path already exists: %s", path_offset, level=ui.DEBUG, color='GREEN')
403 run_svn(["remove", "--force", path_offset])
404 run_svn(["copy", "-r", tgt_rev, target_url+"/"+copyfrom_offset+"@"+str(tgt_rev), path_offset])
405 # Export the final version of this file/folder from the source repo, to make
406 # sure we're up-to-date.
407 add_path(export_paths, path_offset)
408 else:
409 ui.status(prefix + ">> do_svn_add: Skipped 'svn copy': %s", path_offset, level=ui.DEBUG, color='GREEN')
410 else:
411 # Else, either this copy-from path has no ancestry back to source_url OR copyfrom_rev comes
412 # before our initial source_start_rev (i.e. tgt_rev == None), so can't do a "svn copy".
413 # Create (parent) directory if needed.
414 # TODO: This is (nearly) a duplicate of code in process_svn_log_entry(). Should this be
415 # split-out to a shared tag?
416 p_path = path_offset if is_dir else os.path.dirname(path_offset).strip() or '.'
417 if not os.path.exists(p_path):
418 run_svn(["mkdir", p_path])
419 if not in_svn(path_offset, prefix=prefix+" "):
420 if is_dir:
421 # Export the final verison of all files in this folder.
422 add_path(export_paths, path_offset)
423 else:
424 # Export the final verison of this file. We *need* to do this before running
425 # the "svn add", even if we end-up re-exporting this file again via export_paths.
426 run_svn(["export", "--force", "-r", source_rev,
427 source_repos_url+source_base+"/"+path_offset+"@"+str(source_rev), path_offset])
428 # If not already under version-control, then "svn add" this file/folder.
429 run_svn(["add", "--parents", path_offset])
430 # TODO: Need to copy SVN properties from source repos
431 if is_dir:
432 # For any folders that we process, process any child contents, so that we correctly
433 # replay copies/replaces/etc.
434 do_svn_add_dir(path_offset, source_rev, copyfrom_path, copyfrom_rev, export_paths, prefix+" ")
435
436 def do_svn_add_dir(path_offset, source_rev, parent_copyfrom_path, parent_copyfrom_rev, \
437 export_paths, prefix=""):
438 # Get the directory contents, to compare between the local WC (target_url) vs. the remote repo (source_url)
439 # TODO: paths_local won't include add'd paths because "svn ls" lists the contents of the
440 # associated remote repo folder. (Is this a problem?)
441 paths_local = get_svn_dirlist(path_offset)
442 paths_remote = get_svn_dirlist(source_url+"/"+path_offset, source_rev)
443 ui.status(prefix + ">> do_svn_add_dir: paths_local: %s", str(paths_local), level=ui.DEBUG, color='GREEN')
444 ui.status(prefix + ">> do_svn_add_dir: paths_remote: %s", str(paths_remote), level=ui.DEBUG, color='GREEN')
445 # Update files/folders which exist in remote but not local
446 for path in paths_remote:
447 path_is_dir = True if path[-1] == "/" else False
448 working_path = path_offset+"/"+(path.rstrip('/') if path_is_dir else path)
449 do_svn_add(working_path, source_rev, parent_copyfrom_path, parent_copyfrom_rev,
450 export_paths, path_is_dir, prefix+" ")
451 # Remove files/folders which exist in local but not remote
452 for path in paths_local:
453 if not path in paths_remote:
454 ui.status(" %s %s", 'D', source_base+"/"+path_offset+"/"+path, level=ui.VERBOSE)
455 run_svn(["remove", "--force", path_offset+"/"+path])
456 # TODO: Does this handle deleted folders too? Wouldn't want to have a case
457 # where we only delete all files from folder but leave orphaned folder around.
458
459 def process_svn_log_entry(log_entry, options, commit_paths, prefix = ""):
460 """
461 Process SVN changes from the given log entry. Build an array (commit_paths)
462 of the paths in the working-copy that were changed, i.e. the paths which
463 we'll pass to "svn commit".
464 """
465 export_paths = []
466 source_rev = log_entry['revision']
467 ui.status(prefix + ">> process_svn_log_entry: %s", source_url+"@"+str(source_rev), level=ui.DEBUG, color='GREEN')
468 for d in log_entry['changed_paths']:
469 # Get the full path for this changed_path
470 # e.g. '/branches/bug123/projectA/file1.txt'
471 path = d['path']
472 if not path.startswith(source_base + "/"):
473 # Ignore changed files that are not part of this subdir
474 if path != source_base:
475 ui.status(prefix + ">> process_svn_log_entry: Unrelated path: %s (base: %s)", path, source_base, level=ui.DEBUG, color='GREEN')
476 continue
477 assert len(d['kind'])>0
478 path_is_dir = True if d['kind'] == 'dir' else False
479 # Calculate the offset (based on source_base) for this changed_path
480 # e.g. 'projectA/file1.txt'
481 # (path = source_base + "/" + path_offset)
482 path_offset = path[len(source_base):].strip("/")
483 # Get the action for this path
484 action = d['action']
485 if action not in _valid_svn_actions:
486 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
487 % (source_rev, action))
488 ui.status(" %s %s%s", action, d['path'],
489 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
490 level=ui.VERBOSE)
491
492 # Try to be efficient and keep track of an explicit list of paths in the
493 # working copy that changed. If we commit from the root of the working copy,
494 # then SVN needs to crawl the entire working copy looking for pending changes.
495 add_path(commit_paths, path_offset)
496
497 # Special-handling for replace's
498 if action == 'R':
499 # If file was "replaced" (deleted then re-added, all in same revision),
500 # then we need to run the "svn rm" first, then change action='A'. This
501 # lets the normal code below handle re-"svn add"'ing the files. This
502 # should replicate the "replace".
503 run_svn(["remove", "--force", path_offset])
504 action = 'A'
505
506 # Handle all the various action-types
507 # (Handle "add" first, for "svn copy/move" support)
508 if action == 'A':
509 # Determine where to export from.
510 svn_copy = False
511 # Handle cases where this "add" was a copy from another URL in the source repos
512 if d['copyfrom_revision']:
513 copyfrom_path = d['copyfrom_path']
514 copyfrom_rev = d['copyfrom_revision']
515 do_svn_add(path_offset, source_rev, "", "", export_paths, path_is_dir, prefix+" ")
516 # Else just "svn export" the files from the source repo and "svn add" them.
517 else:
518 # Create (parent) directory if needed
519 p_path = path_offset if path_is_dir else os.path.dirname(path_offset).strip() or '.'
520 if not os.path.exists(p_path):
521 run_svn(["mkdir", p_path])
522 # Export the entire added tree.
523 if path_is_dir:
524 # For directories, defer the (recurisve) "svn export". Might have a
525 # situation in a branch merge where the entry in the svn-log is a
526 # non-copy-from'd "add" but there are child contents (that we haven't
527 # gotten to yet in log_entry) that are copy-from's. When we try do
528 # the "svn copy" later on in do_svn_add() for those copy-from'd paths,
529 # having pre-existing (svn-add'd) contents creates some trouble.
530 # Instead, just create the stub folders ("svn mkdir" above) and defer
531 # exporting the final file-state until the end.
532 add_path(export_paths, path_offset)
533 else:
534 # Export the final verison of this file. We *need* to do this before running
535 # the "svn add", even if we end-up re-exporting this file again via export_paths.
536 run_svn(["export", "--force", "-r", source_rev,
537 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
538 if not in_svn(path_offset, prefix=prefix+" "):
539 # Need to use in_svn here to handle cases where client committed the parent
540 # folder and each indiv sub-folder.
541 run_svn(["add", "--parents", path_offset])
542 # TODO: Need to copy SVN properties from source repos
543
544 elif action == 'D':
545 run_svn(["remove", "--force", path_offset])
546
547 elif action == 'M':
548 # TODO: Is "svn merge -c" correct here? Should this just be an "svn export" plus
549 # proplist updating?
550 out = run_svn(["merge", "-c", source_rev, "--non-recursive",
551 "--non-interactive", "--accept=theirs-full",
552 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
553
554 else:
555 raise InternalError("Internal Error: process_svn_log_entry: Unhandled 'action' value: '%s'"
556 % action)
557
558 # Export the final version of all add'd paths from source_url
559 if export_paths:
560 for path_offset in export_paths:
561 run_svn(["export", "--force", "-r", source_rev,
562 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
563
564 # Optionally verify that we replayed everything correctly
565 if options.verify:
566 wc_info = svnclient.get_svn_info(".")
567 target_rev = wc_info['revision']
568 #checked_paths = []
569 for d in log_entry['changed_paths']:
570 path = d['path']
571 if not path.startswith(source_base + "/"):
572 continue
573 path_offset = path[len(source_base):].strip("/")
574 #if path_in_list(checked_paths, path_offset):
575 # continue
576 #add_path(checked_paths, path_offset)
577 action = d['action']
578 assert len(d['kind'])>0
579 path_is_dir = True if d['kind'] == 'dir' else False
580 desc = " %s %s%s" % (action, d['path'], (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "")
581 ui.status(prefix+">> process_svn_log_entry: Verify: "+desc, level=ui.DEBUG, color='GREEN')
582 status = svnclient.get_svn_status(path_offset)[0]
583 print status
584 if action not in _valid_svn_actions:
585 continue
586 if action == 'D':
587 action_type = "delete"
588 status_status = "deleted"
589 if os.path.exists(path_offset) and not path_is_dir:
590 # Don't complain if (root) directory still exists. Root deleted directory
591 # will be removed during the commit.
592 raise VerificationError("Path exists in _wc_target:\n%s" % \
593 (desc))
594 # Verify that _wc_target tracks this as a deletion
595 if not status['status'] == status_status:
596 raise VerificationError("Path not scheduled for %s in _wc_target:\n%s" % \
597 (action_type, desc))
598 if action in 'AR':
599 action_type = ("add" if action == "A" else "replace")
600 status_status = ("added" if action == "A" else "replaced")
601 if not os.path.exists(path_offset):
602 raise VerificationError("Path doesn't exist in _wc_target:\n%s" % \
603 (desc))
604 # Verify that _wc_target tracks this as an add/replace
605 if not status['status'] == status_status:
606 raise VerificationError("Path not scheduled for %s in _wc_target:\n%s" % \
607 (action_type, desc))
608 source_ancestors = find_svn_ancestors(source_url, source_base, \
609 path_offset, source_rev, prefix+" ") if d['copyfrom_path'] \
610 else []
611 if not source_ancestors:
612 # For copyfrom=False, check that local path doesn't have any ancestry
613 if status['copied'] and status['copied'] == 'true':
614 raise VerificationError("Path copy-from'd in _wc_target:\n%s" % \
615 (desc))
616 else:
617 # For copyfrom=True, need to recursively check all child contents.
618 if not status['copied'] or status['copied'] != 'true':
619 raise VerificationError("Path not copy-from'd in _wc_target:\n%s" % \
620 (desc))
621 target_ancestors = find_svn_ancestors(".", target_base, path_offset, 'HEAD', prefix+" ")
622 source_copyfrom_path = source_ancestors[len(source_ancestors)-1]['path']
623 source_copyfrom_offset = source_copyfrom_path[len(source_base):].strip('/')
624 source_copyfrom_rev = source_ancestors[len(source_ancestors)-1]['revision']
625 target_copyfrom_path = target_ancestors[len(target_ancestors)-1]['path']
626 target_copyfrom_offset = target_copyfrom_path[len(target_base):].strip('/')
627 target_copyfrom_rev = target_ancestors[len(target_ancestors)-1]['revision']
628 if source_copyfrom_offset != target_copyfrom_offset:
629 raise VerificationError("Path not copy-from'd same path_offset: source_copyfrom=%s, target_copyfrom=%s:\n%s" % \
630 (source_copyfrom_path+"@"+str(source_copyfrom_rev), target_copyfrom_path+"@"+str(target_copyfrom_rev), desc))
631 source_tgt_rev = get_rev_map(rev_map, source_copyfrom_rev, prefix+" ")
632 if source_tgt_rev != target_copyfrom_rev:
633 raise VerificationError("Path not copy-from'd same revision: get_rev_map(%s)=%s, target_rev=%s\n%s" % \
634 (source_copyfrom_rev, source_tgt_rev, target_rev, desc))
635 # TODO: Recursively check all child contents
636 pass
637 if action == 'M':
638 action_type = "modified"
639 if not os.path.exists(path_offset):
640 raise VerificationError(" %s %s" % (action, path_offset))
641 if path_is_dir:
642 # TODO: Is there anything to check for directories? Maybe properties?
643 pass
644 else:
645 # TODO: Use md5sum to compare both the remote and local file?
646 pass
647
648 def disp_svn_log_summary(log_entry):
649 ui.status("")
650 ui.status("r%s | %s | %s",
651 log_entry['revision'],
652 log_entry['author'],
653 str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' ')))
654 ui.status(log_entry['message'])
655 ui.status("------------------------------------------------------------------------")
656
657 def real_main(options, args):
658 global source_url, target_url, rev_map
659 source_url = args.pop(0).rstrip("/") # e.g. 'http://server/svn/source/trunk'
660 target_url = args.pop(0).rstrip("/") # e.g. 'file:///svn/target/trunk'
661 ui.status("options: %s", str(options), level=ui.DEBUG, color='GREEN')
662
663 # Make sure that both the source and target URL's are valid
664 source_info = svnclient.get_svn_info(source_url)
665 assert source_url.startswith(source_info['repos_url'])
666 target_info = svnclient.get_svn_info(target_url)
667 assert target_url.startswith(target_info['repos_url'])
668
669 # Init global vars
670 global source_repos_url,source_base,source_repos_uuid
671 source_repos_url = source_info['repos_url'] # e.g. 'http://server/svn/source'
672 source_base = source_url[len(source_repos_url):] # e.g. '/trunk'
673 source_repos_uuid = source_info['repos_uuid']
674
675 source_end_rev = source_info['revision'] # Last revision # in the source repo
676 wc_target = os.path.abspath('_wc_target')
677 num_entries_proc = 0
678 commit_count = 0
679 source_rev = None
680 target_rev = None
681
682 # Check out a working copy of target_url if needed
683 wc_exists = os.path.exists(wc_target)
684 if wc_exists and not options.cont_from_break:
685 shutil.rmtree(wc_target)
686 wc_exists = False
687 if not wc_exists:
688 svnclient.svn_checkout(target_url, wc_target)
689 os.chdir(wc_target)
690
691 if not options.cont_from_break:
692 # TODO: Warn user if trying to start (non-continue) into a non-empty target path?
693 # Get log entry for the SVN revision we will check out
694 if options.svn_rev:
695 # If specify a rev, get log entry just before or at rev
696 source_start_log = svnclient.get_last_svn_log_entry(source_url, 1, options.svn_rev, False)
697 else:
698 # Otherwise, get log entry of branch creation
699 # Note: Trying to use svnclient.get_first_svn_log_entry(source_url, 1, source_end_rev, False)
700 # ends-up being *VERY* time-consuming on a repo with lots of revisions. Even though
701 # the "svn log" call is passing --limit 1, it seems like that limit-filter is happening
702 # _after_ svn has fetched the full log history. Instead, search the history in chunks
703 # and write some progress to the screen.
704 ui.status("Searching for start source revision (%s)...", source_url, level=ui.VERBOSE)
705 rev = 1
706 chunk_size = 1000
707 done = False
708 while not done:
709 entries = svnclient.run_svn_log(source_url, rev, min(rev+chunk_size-1, target_info['revision']), 1, get_changed_paths=False)
710 if entries:
711 source_start_log = entries[0]
712 done = True
713 break
714 ui.status("...%s...", rev)
715 rev = rev+chunk_size
716 if rev > target_info['revision']:
717 done = True
718 if not source_start_log:
719 raise InternalError("Unable to find first revision for source_url: %s" % source_url)
720
721 # This is the revision we will start from for source_url
722 source_start_rev = source_rev = int(source_start_log['revision'])
723 ui.status("Starting at source revision %s.", source_start_rev, level=ui.VERBOSE)
724
725 # For the initial commit to the target URL, export all the contents from
726 # the source URL at the start-revision.
727 paths = run_svn(["list", "-r", source_rev, source_url+"@"+str(source_rev)])
728 if len(paths)>1:
729 disp_svn_log_summary(svnclient.get_one_svn_log_entry(source_url, source_rev, source_rev))
730 ui.status("(Initial import)", level=ui.VERBOSE)
731 paths = paths.strip("\n").split("\n")
732 for path_raw in paths:
733 # For each top-level file/folder...
734 if not path_raw:
735 continue
736 # Directories have a trailing slash in the "svn list" output
737 path_is_dir = True if path_raw[-1] == "/" else False
738 path = path_raw.rstrip('/') if path_is_dir else path_raw
739 if path_is_dir and not os.path.exists(path):
740 os.makedirs(path)
741 ui.status(" A %s", source_url[len(source_repos_url):]+"/"+path, level=ui.VERBOSE)
742 run_svn(["export", "--force", "-r" , source_rev, source_url+"/"+path+"@"+str(source_rev), path])
743 run_svn(["add", path])
744 num_entries_proc += 1
745 target_revprops = gen_tracking_revprops(source_rev) # Build source-tracking revprop's
746 target_rev = commit_from_svn_log_entry(source_start_log, options, target_revprops=target_revprops)
747 if target_rev:
748 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
749 set_rev_map(source_rev, target_rev)
750 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
751 run_svn(["update"])
752 commit_count += 1
753 else:
754 # Re-build the rev_map based on any already-replayed history in target_url
755 build_rev_map(target_url, source_info)
756 if not rev_map:
757 raise RuntimeError("Called with continue-mode, but no already-replayed history found in target repo: %s" % target_url)
758 source_start_rev = int(max(rev_map, key=rev_map.get))
759 assert source_start_rev
760 ui.status("Continuing from source revision %s.", source_start_rev, level=ui.VERBOSE)
761
762 svn_vers_t = svnclient.get_svn_client_version()
763 svn_vers = float(".".join(map(str, svn_vers_t[0:2])))
764
765 # Load SVN log starting from source_start_rev + 1
766 it_log_entries = svnclient.iter_svn_log_entries(source_url, source_start_rev+1, source_end_rev, get_revprops=True)
767 source_rev = None
768
769 try:
770 for log_entry in it_log_entries:
771 if options.entries_proc_limit:
772 if num_entries_proc >= options.entries_proc_limit:
773 break
774 # Replay this revision from source_url into target_url
775 disp_svn_log_summary(log_entry)
776 source_rev = log_entry['revision']
777 # Process all the changed-paths in this log entry
778 commit_paths = []
779 process_svn_log_entry(log_entry, options, commit_paths)
780 num_entries_proc += 1
781 # Commit any changes made to _wc_target
782 target_revprops = gen_tracking_revprops(source_rev) # Build source-tracking revprop's
783 target_rev = commit_from_svn_log_entry(log_entry, options, commit_paths, target_revprops=target_revprops)
784 if target_rev:
785 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
786 source_rev = log_entry['revision']
787 set_rev_map(source_rev, target_rev)
788 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
789 run_svn(["update"])
790 commit_count += 1
791 # Run "svn cleanup" every 100 commits if SVN 1.7+, to clean-up orphaned ".svn/pristines/*"
792 if svn_vers >= 1.7 and (commit_count % 100 == 0):
793 run_svn(["cleanup"])
794 if not source_rev:
795 # If there were no new source_url revisions to process, init source_rev
796 # for the "finally" message below.
797 source_rev = source_end_rev
798
799 except KeyboardInterrupt:
800 print "\nStopped by user."
801 print "\nCleaning-up..."
802 run_svn(["cleanup"])
803 full_svn_revert()
804 except:
805 print "\nCommand failed with following error:\n"
806 traceback.print_exc()
807 print "\nCleaning-up..."
808 run_svn(["cleanup"])
809 print run_svn(["status"])
810 full_svn_revert()
811 finally:
812 print "\nFinished at source revision %s%s." % (source_rev, " (dry-run)" if options.dry_run else "")
813
814 def main():
815 # Defined as entry point. Must be callable without arguments.
816 usage = "Usage: %prog [OPTIONS] source_url target_url"
817 description = """\
818 Replicate (replay) history from one SVN repository to another. Maintain
819 logical ancestry wherever possible, so that 'svn log' on the replayed
820 repo will correctly follow file/folder renames.
821
822 == Examples ==
823 Create a copy of only /trunk from source repo, starting at r5000
824 $ svnadmin create /svn/target
825 $ svn mkdir -m 'Add trunk' file:///svn/target/trunk
826 $ svn2svn -av -r 5000 http://server/source/trunk file:///svn/target/trunk
827 1. The target_url will be checked-out to ./_wc_target
828 2. The first commit to http://server/source/trunk at/after r5000 will be
829 exported & added into _wc_target
830 3. All revisions affecting http://server/source/trunk (starting at r5000)
831 will be replayed to _wc_target. Any add/copy/move/replaces that are
832 copy-from'd some path outside of /trunk (e.g. files renamed on a
833 /branch and branch was merged into /trunk) will correctly maintain
834 logical ancestry where possible.
835
836 Use continue-mode (-c) to pick-up where the last run left-off
837 $ svn2svn -avc http://server/source/trunk file:///svn/target/trunk
838 1. The target_url will be checked-out to ./_wc_target, if not already
839 checked-out
840 2. All new revisions affecting http://server/source/trunk starting from
841 the last replayed revision to file:///svn/target/trunk (based on the
842 svn2svn:* revprops) will be replayed to _wc_target, maintaining all
843 logical ancestry where possible."""
844 parser = optparse.OptionParser(usage, description=description,
845 formatter=HelpFormatter(), version="%prog "+str(full_version))
846 #parser.remove_option("--help")
847 #parser.add_option("-h", "--help", dest="show_help", action="store_true",
848 # help="show this help message and exit")
849 parser.add_option("-r", "--revision", type="int", dest="svn_rev", metavar="REV",
850 help="initial SVN revision to start source_url replay")
851 parser.add_option("-a", "--keep-author", action="store_true", dest="keep_author", default=False,
852 help="maintain original 'Author' info from source repo")
853 parser.add_option("-c", "--continue", action="store_true", dest="cont_from_break",
854 help="continue from previous break")
855 parser.add_option("-x", "--verify", action="store_true", dest="verify", default=False,
856 help="verify ancestry and content before target commits")
857 parser.add_option("-l", "--limit", type="int", dest="entries_proc_limit", metavar="NUM",
858 help="maximum number of log entries to process")
859 parser.add_option("-n", "--dry-run", action="store_true", dest="dry_run", default=False,
860 help="try processing next log entry but don't commit changes to "
861 "target working-copy (forces --limit=1)")
862 parser.add_option("-v", "--verbose", dest="verbosity", action="count", default=1,
863 help="enable additional output (use -vv or -vvv for more)")
864 parser.add_option("--debug", dest="verbosity", const=ui.DEBUG, action="store_const",
865 help="enable debugging output (same as -vvv)")
866 options, args = parser.parse_args()
867 if len(args) != 2:
868 parser.error("incorrect number of arguments")
869 if options.verbosity < 10:
870 # Expand multiple "-v" arguments to a real ui._level value
871 options.verbosity *= 10
872 if options.dry_run:
873 # When in dry-run mode, only try to process the next log_entry
874 options.entries_proc_limit = 1
875 ui.update_config(options)
876 return real_main(options, args)
877
878
879 if __name__ == "__main__":
880 sys.exit(main() or 0)