]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn/run/svn2svn.py
Misc code clean-up
[svn2svn.git] / svn2svn / run / svn2svn.py
1 """
2 Replicate (replay) changesets from one SVN repository to another.
3 """
4
5 from .. import base_version, full_version
6 from .. import ui
7 from .. import svnclient
8 from ..shell import run_svn
9 from ..errors import (ExternalCommandFailed, UnsupportedSVNAction, InternalError, VerificationError)
10 from parse import HelpFormatter
11
12 import sys
13 import os
14 import time
15 import traceback
16 import shutil
17 import operator
18 import optparse
19 from datetime import datetime
20
21 _valid_svn_actions = "MARD" # The list of known SVN action abbr's, from "svn log"
22
23 def commit_from_svn_log_entry(log_entry, options, commit_paths=None, target_revprops=None):
24 """
25 Given an SVN log entry and an optional list of changed paths, do an svn commit.
26 """
27 # TODO: Run optional external shell hook here, for doing pre-commit filtering
28 # Display the _wc_target "svn status" info if running in -vv (or higher) mode
29 if ui.get_level() >= ui.EXTRA:
30 ui.status(">> commit_from_svn_log_entry: Pre-commit _wc_target status:", level=ui.EXTRA, color='CYAN')
31 ui.status(run_svn(["status"]), level=ui.EXTRA, color='CYAN')
32 # This will use the local timezone for displaying commit times
33 timestamp = int(log_entry['date'])
34 svn_date = str(datetime.fromtimestamp(timestamp))
35 # Uncomment this one one if you prefer UTC commit times
36 #svn_date = "%d 0" % timestamp
37 args = ["commit", "--force-log"]
38 if options.keep_author:
39 args += ["-m", log_entry['message'] + "\nDate: " + svn_date, "--username", log_entry['author']]
40 else:
41 args += ["-m", log_entry['message'] + "\nDate: " + svn_date + "\nAuthor: " + log_entry['author']]
42 revprops = {}
43 if log_entry['revprops']:
44 # Carry forward any revprop's from the source revision
45 for v in log_entry['revprops']:
46 revprops[v['name']] = v['value']
47 if target_revprops:
48 # Add any extra revprop's we want to set for the target repo commits
49 for v in target_revprops:
50 revprops[v['name']] = v['value']
51 if revprops:
52 for key in revprops:
53 args += ["--with-revprop", "%s=%s" % (key, str(revprops[key]))]
54 if commit_paths:
55 if len(commit_paths)<100:
56 # If we don't have an excessive amount of individual changed paths, pass
57 # those to the "svn commit" command. Else, pass nothing so we commit at
58 # the root of the working-copy.
59 args += list(commit_paths)
60 rev = None
61 if not options.dry_run:
62 # Run the "svn commit" command, and screen-scrape the target_rev value (if any)
63 output = run_svn(args)
64 if output:
65 output_lines = output.strip("\n").split("\n")
66 rev = ""
67 for line in output_lines:
68 if line[0:19] == 'Committed revision ':
69 rev = line[19:].rstrip('.')
70 break
71 if rev:
72 ui.status("Committed revision %s.", rev)
73 return rev
74
75 def full_svn_revert():
76 """
77 Do an "svn revert" and proactively remove any extra files in the working copy.
78 """
79 run_svn(["revert", "--recursive", "."])
80 output = run_svn(["status"])
81 if output:
82 output_lines = output.strip("\n").split("\n")
83 for line in output_lines:
84 if line[0] == "?":
85 path = line[4:].strip(" ")
86 if os.path.isfile(path):
87 os.remove(path)
88 if os.path.isdir(path):
89 shutil.rmtree(path)
90
91 def gen_tracking_revprops(source_repos_uuid, source_url, source_rev):
92 """
93 Build an array of svn2svn-specific source-tracking revprops.
94 """
95 revprops = [{'name':'svn2svn:source_uuid', 'value':source_repos_uuid},
96 {'name':'svn2svn:source_url', 'value':source_url},
97 {'name':'svn2svn:source_rev', 'value':source_rev}]
98 return revprops
99
100 def in_svn(p, require_in_repo=False, prefix=""):
101 """
102 Check if a given file/folder is being tracked by Subversion.
103 Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
104 With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
105 Use "svn status" to check the status of the file/folder.
106 """
107 entries = svnclient.get_svn_status(p, no_recursive=True)
108 if not entries:
109 return False
110 d = entries[0]
111 if require_in_repo and (d['status'] == 'added' or d['revision'] is None):
112 # If caller requires this path to be in the SVN repo, prevent returning True
113 # for paths that are only locally-added.
114 ret = False
115 else:
116 # Don't consider files tracked as deleted in the WC as under source-control.
117 # Consider files which are locally added/copied as under source-control.
118 ret = True if not (d['status'] == 'deleted') and (d['type'] == 'normal' or d['status'] == 'added' or d['copied'] == 'true') else False
119 ui.status(prefix + ">> in_svn('%s', require_in_repo=%s) --> %s", p, str(require_in_repo), str(ret), level=ui.DEBUG, color='GREEN')
120 return ret
121
122 def find_svn_ancestors(svn_repos_url, base_path, source_path, source_rev, prefix = ""):
123 """
124 Given a source path, walk the SVN history backwards to inspect the ancestory of
125 that path, seeing if it traces back to base_path. Build an array of copyfrom_path
126 and copyfrom_revision pairs for each of the "svn copies". If we find a copyfrom_path
127 which base_path is a substring match of (e.g. we crawled back to the initial branch-
128 copy from trunk), then return the collection of ancestor paths. Otherwise,
129 copyfrom_path has no ancestory compared to base_path.
130
131 This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
132 file/folder was renamed in a branch and then that branch was merged back to trunk.
133
134 'svn_repos_url' is the full URL to the root of the SVN repository,
135 e.g. 'file:///path/to/repo'
136 'base_path' is the path in the SVN repo to the target path we're trying to
137 trace ancestry back to, e.g. 'trunk'.
138 'source_path' is the path in the SVN repo to the source path to start checking
139 ancestry at, e.g. 'branches/fix1/projectA/file1.txt'.
140 (full_path = svn_repos_url+base_path+"/"+path_offset)
141 'source_rev' is the revision to start walking the history of source_path backwards from.
142 """
143 ui.status(prefix + ">> find_svn_ancestors: Start: (%s) source_path: %s base_path: %s",
144 svn_repos_url, source_path+"@"+str(source_rev), base_path, level=ui.DEBUG, color='YELLOW')
145 done = False
146 working_path = base_path+"/"+source_path
147 working_rev = source_rev
148 first_iter_done = False
149 ancestors_temp = []
150 while not done:
151 # Get the first "svn log" entry for this path (relative to @rev)
152 ui.status(prefix + ">> find_svn_ancestors: %s", svn_repos_url + working_path+"@"+str(working_rev), level=ui.DEBUG, color='YELLOW')
153 log_entry = svnclient.get_first_svn_log_entry(svn_repos_url + working_path+"@"+str(working_rev), 1, working_rev, True)
154 if not log_entry:
155 ui.status(prefix + ">> find_svn_ancestors: Done: no log_entry", level=ui.DEBUG, color='YELLOW')
156 done = True
157 break
158 # If we found a copy-from case which matches our base_path, we're done.
159 # ...but only if we've at least tried to search for the first copy-from path.
160 if first_iter_done and working_path.startswith(base_path):
161 ui.status(prefix + ">> find_svn_ancestors: Done: Found working_path.startswith(base_path) and first_iter_done=True", level=ui.DEBUG, color='YELLOW')
162 done = True
163 break
164 first_iter_done = True
165 # Search for any actions on our target path (or parent paths).
166 changed_paths_temp = []
167 for d in log_entry['changed_paths']:
168 path = d['path']
169 if path in working_path:
170 changed_paths_temp.append({'path': path, 'data': d})
171 if not changed_paths_temp:
172 # If no matches, then we've hit the end of the chain and this path has no ancestry back to base_path.
173 ui.status(prefix + ">> find_svn_ancestors: Done: No matching changed_paths", level=ui.DEBUG, color='YELLOW')
174 done = True
175 continue
176 # Reverse-sort any matches, so that we start with the most-granular (deepest in the tree) path.
177 changed_paths = sorted(changed_paths_temp, key=operator.itemgetter('path'), reverse=True)
178 # Find the action for our working_path in this revision. Use a loop to check in reverse order,
179 # so that if the target file/folder is "M" but has a parent folder with an "A" copy-from.
180 for v in changed_paths:
181 d = v['data']
182 path = d['path']
183 # Check action-type for this file
184 action = d['action']
185 if action not in _valid_svn_actions:
186 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
187 % (log_entry['revision'], action))
188 ui.status(prefix + "> %s %s%s", action, path,
189 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
190 level=ui.DEBUG, color='YELLOW')
191 if action == 'D':
192 # If file/folder was deleted, it has no ancestor
193 ancestors_temp = []
194 ui.status(prefix + ">> find_svn_ancestors: Done: deleted", level=ui.DEBUG, color='YELLOW')
195 done = True
196 break
197 if action in 'RA':
198 # If file/folder was added/replaced but not a copy, it has no ancestor
199 if not d['copyfrom_path']:
200 ancestors_temp = []
201 ui.status(prefix + ">> find_svn_ancestors: Done: %s with no copyfrom_path",
202 "Added" if action == "A" else "Replaced",
203 level=ui.DEBUG, color='YELLOW')
204 done = True
205 break
206 # Else, file/folder was added/replaced and is a copy, so add an entry to our ancestors list
207 # and keep checking for ancestors
208 ui.status(prefix + ">> find_svn_ancestors: Found copy-from (action=%s): %s --> %s",
209 action, path, d['copyfrom_path']+"@"+str(d['copyfrom_revision']),
210 level=ui.DEBUG, color='YELLOW')
211 ancestors_temp.append({'path': path, 'revision': log_entry['revision'],
212 'copyfrom_path': d['copyfrom_path'], 'copyfrom_rev': d['copyfrom_revision']})
213 working_path = working_path.replace(d['path'], d['copyfrom_path'])
214 working_rev = d['copyfrom_revision']
215 # Follow the copy and keep on searching
216 break
217 ancestors = []
218 if ancestors_temp:
219 ancestors.append({'path': base_path+"/"+source_path, 'revision': source_rev})
220 working_path = base_path+"/"+source_path
221 for idx in range(len(ancestors_temp)):
222 d = ancestors_temp[idx]
223 working_path = working_path.replace(d['path'], d['copyfrom_path'])
224 working_rev = d['copyfrom_rev']
225 ancestors.append({'path': working_path, 'revision': working_rev})
226 if ui.get_level() >= ui.DEBUG:
227 max_len = 0
228 for idx in range(len(ancestors)):
229 d = ancestors[idx]
230 max_len = max(max_len, len(d['path']+"@"+str(d['revision'])))
231 ui.status(prefix + ">> find_svn_ancestors: Found parent ancestors:", level=ui.DEBUG, color='YELLOW_B')
232 for idx in range(len(ancestors)-1):
233 d = ancestors[idx]
234 d_next = ancestors[idx+1]
235 ui.status(prefix + " [%s] %s <-- %s", idx,
236 str(d['path']+"@"+str(d['revision'])).ljust(max_len),
237 str(d_next['path']+"@"+str(d_next['revision'])).ljust(max_len),
238 level=ui.DEBUG, color='YELLOW')
239 else:
240 ui.status(prefix + ">> find_svn_ancestors: No ancestor-chain found: %s",
241 svn_repos_url+base_path+"/"+source_path+"@"+str(source_rev), level=ui.DEBUG, color='YELLOW')
242 return ancestors
243
244 def get_rev_map(rev_map, source_rev, prefix):
245 """
246 Find the equivalent rev # in the target repo for the given rev # from the source repo.
247 """
248 ui.status(prefix + ">> get_rev_map(%s)", source_rev, level=ui.DEBUG, color='GREEN')
249 # Find the highest entry less-than-or-equal-to source_rev
250 for rev in range(int(source_rev), 0, -1):
251 ui.status(prefix + ">> get_rev_map: rev=%s in_rev_map=%s", rev, str(rev in rev_map), level=ui.DEBUG, color='BLACK_B')
252 if rev in rev_map:
253 return int(rev_map[rev])
254 # Else, we fell off the bottom of the rev_map. Ruh-roh...
255 return None
256
257 def set_rev_map(rev_map, source_rev, target_rev):
258 ui.status(">> set_rev_map: source_rev=%s target_rev=%s", source_rev, target_rev, level=ui.DEBUG, color='GREEN')
259 rev_map[int(source_rev)]=int(target_rev)
260
261 def build_rev_map(target_url, source_info):
262 """
263 Check for any already-replayed history from source_url (source_info) and
264 build the mapping-table of source_rev -> target_rev.
265 """
266 rev_map = {}
267 ui.status("Rebuilding rev_map...", level=ui.VERBOSE)
268 proc_count = 0
269 it_log_entries = svnclient.iter_svn_log_entries(target_url, 1, 'HEAD', get_changed_paths=False, get_revprops=True)
270 for log_entry in it_log_entries:
271 if log_entry['revprops']:
272 revprops = {}
273 for v in log_entry['revprops']:
274 if v['name'].startswith('svn2svn:'):
275 revprops[v['name']] = v['value']
276 if revprops['svn2svn:source_uuid'] == source_info['repos_uuid'] and \
277 revprops['svn2svn:source_url'] == source_info['url']:
278 source_rev = revprops['svn2svn:source_rev']
279 target_rev = log_entry['revision']
280 set_rev_map(rev_map, source_rev, target_rev)
281 return rev_map
282
283 def get_svn_dirlist(svn_path, svn_rev = ""):
284 """
285 Get a list of all the child contents (recusive) of the given folder path.
286 """
287 args = ["list"]
288 path = svn_path
289 if svn_rev:
290 args += ["-r", svn_rev]
291 path += "@"+str(svn_rev)
292 args += [path]
293 paths = run_svn(args, no_fail=True)
294 paths = paths.strip("\n").split("\n") if len(paths)>1 else []
295 return paths
296
297 def path_in_list(paths, path):
298 for p in paths:
299 if path.startswith(p):
300 return True
301 return False
302
303 def add_path(paths, path):
304 if not path_in_list(paths, path):
305 paths.append(path)
306
307 def do_svn_add(source_repos_url, source_url, path_offset, target_url, source_rev, \
308 parent_copyfrom_path="", parent_copyfrom_rev="", export_paths={}, \
309 rev_map={}, is_dir = False, prefix = ""):
310 """
311 Given the add'd source path, replay the "svn add/copy" commands to correctly
312 track renames across copy-from's.
313
314 For example, consider a sequence of events like this:
315 1. svn copy /trunk /branches/fix1
316 2. (Make some changes on /branches/fix1)
317 3. svn mv /branches/fix1/Proj1 /branches/fix1/Proj2 " Rename folder
318 4. svn mv /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt " Rename file inside renamed folder
319 5. svn co /trunk && svn merge /branches/fix1
320 After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
321 and and add of /trunk/Proj2 copy-from /branches/fix1/Proj2. If we were just
322 to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
323 we'd lose the logical history that Proj2/file2.txt is really a descendant
324 of Proj1/file1.txt.
325
326 'source_repos_url' is the full URL to the root of the source repository.
327 'source_url' is the full URL to the source path in the source repository.
328 'path_offset' is the offset from source_base to the file to check ancestry for,
329 e.g. 'projectA/file1.txt'. path = source_repos_url + source_base + path_offset.
330 'target_url' is the full URL to the target path in the target repository.
331 'source_rev' is the revision ("svn log") that we're processing from the source repo.
332 'parent_copyfrom_path' and 'parent_copyfrom_rev' is the copy-from path of the parent
333 directory, when being called recursively by do_svn_add_dir().
334 'export_paths' is the list of path_offset's that we've deferred running "svn export" on.
335 'rev_map' is the running mapping-table dictionary for source-repo rev #'s
336 to the equivalent target-repo rev #'s.
337 'is_dir' is whether path_offset is a directory (rather than a file).
338 """
339 source_base = source_url[len(source_repos_url):]
340 ui.status(prefix + ">> do_svn_add: %s %s", source_base+"/"+path_offset+"@"+str(source_rev),
341 " (parent-copyfrom: "+parent_copyfrom_path+"@"+str(parent_copyfrom_rev)+")" if parent_copyfrom_path else "",
342 level=ui.DEBUG, color='GREEN')
343 # Check if the given path has ancestors which chain back to the current source_base
344 found_ancestor = False
345 ancestors = find_svn_ancestors(source_repos_url, source_base, path_offset, source_rev, prefix+" ")
346 # ancestors[n] is the original (pre-branch-copy) trunk path.
347 # ancestors[n-1] is the first commit on the new branch.
348 copyfrom_path = ancestors[len(ancestors)-1]['path'] if ancestors else ""
349 copyfrom_rev = ancestors[len(ancestors)-1]['revision'] if ancestors else ""
350 if ancestors:
351 # The copy-from path has ancestory back to source_url.
352 ui.status(prefix + ">> do_svn_add: Check copy-from: Found parent: %s", copyfrom_path+"@"+str(copyfrom_rev),
353 level=ui.DEBUG, color='GREEN', bold=True)
354 found_ancestor = True
355 # Map the copyfrom_rev (source repo) to the equivalent target repo rev #. This can
356 # return None in the case where copyfrom_rev is *before* our source_start_rev.
357 tgt_rev = get_rev_map(rev_map, copyfrom_rev, prefix+" ")
358 ui.status(prefix + ">> do_svn_add: get_rev_map: %s (source) -> %s (target)", copyfrom_rev, tgt_rev, level=ui.DEBUG, color='GREEN')
359 else:
360 ui.status(prefix + ">> do_svn_add: Check copy-from: No ancestor chain found.", level=ui.DEBUG, color='GREEN')
361 found_ancestor = False
362 if found_ancestor and tgt_rev:
363 # Check if this path_offset in the target WC already has this ancestry, in which
364 # case there's no need to run the "svn copy" (again).
365 path_in_svn = in_svn(path_offset, prefix=prefix+" ")
366 log_entry = svnclient.get_last_svn_log_entry(path_offset, 1, 'HEAD', get_changed_paths=False) if in_svn(path_offset, require_in_repo=True, prefix=prefix+" ") else []
367 if (not log_entry or (log_entry['revision'] != tgt_rev)):
368 copyfrom_offset = copyfrom_path[len(source_base):].strip('/')
369 ui.status(prefix + ">> do_svn_add: svn_copy: Copy-from: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
370 ui.status(prefix + " copyfrom: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
371 ui.status(prefix + " p_copyfrom: %s", parent_copyfrom_path+"@"+str(parent_copyfrom_rev) if parent_copyfrom_path else "", level=ui.DEBUG, color='GREEN')
372 if path_in_svn and \
373 ((parent_copyfrom_path and copyfrom_path.startswith(parent_copyfrom_path)) and \
374 (parent_copyfrom_rev and copyfrom_rev == parent_copyfrom_rev)):
375 # When being called recursively, if this child entry has the same ancestor as the
376 # the parent, then no need to try to run another "svn copy".
377 ui.status(prefix + ">> do_svn_add: svn_copy: Same ancestry as parent: %s",
378 parent_copyfrom_path+"@"+str(parent_copyfrom_rev),level=ui.DEBUG, color='GREEN')
379 pass
380 else:
381 # Copy this path from the equivalent path+rev in the target repo, to create the
382 # equivalent history.
383 if parent_copyfrom_path:
384 # If we have a parent copy-from path, we mis-match that so display a status
385 # message describing the action we're mimic'ing. If path_in_svn, then this
386 # is logically a "replace" rather than an "add".
387 ui.status(" %s %s (from %s)", ('R' if path_in_svn else 'A'), source_base+"/"+path_offset, ancestors[1]['path']+"@"+str(copyfrom_rev), level=ui.VERBOSE)
388 if path_in_svn:
389 # If local file is already under version-control, then this is a replace.
390 ui.status(prefix + ">> do_svn_add: pre-copy: local path already exists: %s", path_offset, level=ui.DEBUG, color='GREEN')
391 run_svn(["remove", "--force", path_offset])
392 run_svn(["copy", "-r", tgt_rev, target_url+"/"+copyfrom_offset+"@"+str(tgt_rev), path_offset])
393 # Export the final version of this file/folder from the source repo, to make
394 # sure we're up-to-date.
395 add_path(export_paths, path_offset)
396 else:
397 ui.status(prefix + ">> do_svn_add: Skipped 'svn copy': %s", path_offset, level=ui.DEBUG, color='GREEN')
398 else:
399 # Else, either this copy-from path has no ancestry back to source_url OR copyfrom_rev comes
400 # before our initial source_start_rev (i.e. tgt_rev == None), so can't do a "svn copy".
401 # Create (parent) directory if needed.
402 # TODO: This is (nearly) a duplicate of code in process_svn_log_entry(). Should this be
403 # split-out to a shared tag?
404 p_path = path_offset if is_dir else os.path.dirname(path_offset).strip() or '.'
405 if not os.path.exists(p_path):
406 run_svn(["mkdir", p_path])
407 if not in_svn(path_offset, prefix=prefix+" "):
408 if is_dir:
409 # Export the final verison of all files in this folder.
410 add_path(export_paths, path_offset)
411 else:
412 # Export the final verison of this file. We *need* to do this before running
413 # the "svn add", even if we end-up re-exporting this file again via export_paths.
414 run_svn(["export", "--force", "-r", source_rev,
415 source_repos_url+source_base+"/"+path_offset+"@"+str(source_rev), path_offset])
416 # If not already under version-control, then "svn add" this file/folder.
417 run_svn(["add", "--parents", path_offset])
418 # TODO: Need to copy SVN properties from source repos
419 if is_dir:
420 # For any folders that we process, process any child contents, so that we correctly
421 # replay copies/replaces/etc.
422 do_svn_add_dir(source_repos_url, source_url, path_offset, source_rev, target_url,
423 copyfrom_path, copyfrom_rev, export_paths, rev_map, prefix+" ")
424
425 def do_svn_add_dir(source_repos_url, source_url, path_offset, source_rev, target_url, \
426 parent_copyfrom_path, parent_copyfrom_rev, export_paths, rev_map, prefix=""):
427 source_base = source_url[len(source_repos_url):]
428 # Get the directory contents, to compare between the local WC (target_url) vs. the remote repo (source_url)
429 # TODO: paths_local won't include add'd paths because "svn ls" lists the contents of the
430 # associated remote repo folder. (Is this a problem?)
431 paths_local = get_svn_dirlist(path_offset)
432 paths_remote = get_svn_dirlist(source_url+"/"+path_offset, source_rev)
433 ui.status(prefix + ">> do_svn_add_dir: paths_local: %s", str(paths_local), level=ui.DEBUG, color='GREEN')
434 ui.status(prefix + ">> do_svn_add_dir: paths_remote: %s", str(paths_remote), level=ui.DEBUG, color='GREEN')
435 # Update files/folders which exist in remote but not local
436 for path in paths_remote:
437 path_is_dir = True if path[-1] == "/" else False
438 working_path = path_offset+"/"+(path.rstrip('/') if path_is_dir else path)
439 do_svn_add(source_repos_url, source_url, working_path, target_url, source_rev,
440 parent_copyfrom_path, parent_copyfrom_rev, export_paths,
441 rev_map, path_is_dir, prefix+" ")
442 # Remove files/folders which exist in local but not remote
443 for path in paths_local:
444 if not path in paths_remote:
445 ui.status(" %s %s", 'D', source_base+"/"+path_offset+"/"+path, level=ui.VERBOSE)
446 run_svn(["remove", "--force", path_offset+"/"+path])
447 # TODO: Does this handle deleted folders too? Wouldn't want to have a case
448 # where we only delete all files from folder but leave orphaned folder around.
449
450 def process_svn_log_entry(log_entry, source_repos_url, source_url, target_url, \
451 rev_map, options, commit_paths = [], prefix = ""):
452 """
453 Process SVN changes from the given log entry.
454 Returns array of all the paths in the working-copy that were changed,
455 i.e. the paths which need to be "svn commit".
456
457 'log_entry' is the array structure built by parse_svn_log_xml().
458 'source_repos_url' is the full URL to the root of the source repository.
459 'source_url' is the full URL to the source path in the source repository.
460 'target_url' is the full URL to the target path in the target repository.
461 'rev_map' is the running mapping-table dictionary for source-repo rev #'s
462 to the equivalent target-repo rev #'s.
463 'commit_paths' is the working list of specific paths which changes to pass
464 to the final "svn commit".
465 """
466 export_paths = []
467 # Get the relative offset of source_url based on source_repos_url
468 # e.g. '/branches/bug123'
469 source_base = source_url[len(source_repos_url):]
470 source_rev = log_entry['revision']
471 ui.status(prefix + ">> process_svn_log_entry: %s", source_url+"@"+str(source_rev), level=ui.DEBUG, color='GREEN')
472 for d in log_entry['changed_paths']:
473 # Get the full path for this changed_path
474 # e.g. '/branches/bug123/projectA/file1.txt'
475 path = d['path']
476 if not path.startswith(source_base + "/"):
477 # Ignore changed files that are not part of this subdir
478 if path != source_base:
479 ui.status(prefix + ">> process_svn_log_entry: Unrelated path: %s (base: %s)", path, source_base, level=ui.DEBUG, color='GREEN')
480 continue
481 assert len(d['kind'])>0
482 path_is_dir = True if d['kind'] == 'dir' else False
483 # Calculate the offset (based on source_base) for this changed_path
484 # e.g. 'projectA/file1.txt'
485 # (path = source_base + "/" + path_offset)
486 path_offset = path[len(source_base):].strip("/")
487 # Get the action for this path
488 action = d['action']
489 if action not in _valid_svn_actions:
490 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
491 % (source_rev, action))
492 ui.status(" %s %s%s", action, d['path'],
493 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
494 level=ui.VERBOSE)
495
496 # Try to be efficient and keep track of an explicit list of paths in the
497 # working copy that changed. If we commit from the root of the working copy,
498 # then SVN needs to crawl the entire working copy looking for pending changes.
499 add_path(commit_paths, path_offset)
500
501 # Special-handling for replace's
502 if action == 'R':
503 # If file was "replaced" (deleted then re-added, all in same revision),
504 # then we need to run the "svn rm" first, then change action='A'. This
505 # lets the normal code below handle re-"svn add"'ing the files. This
506 # should replicate the "replace".
507 run_svn(["remove", "--force", path_offset])
508 action = 'A'
509
510 # Handle all the various action-types
511 # (Handle "add" first, for "svn copy/move" support)
512 if action == 'A':
513 # Determine where to export from.
514 svn_copy = False
515 # Handle cases where this "add" was a copy from another URL in the source repos
516 if d['copyfrom_revision']:
517 copyfrom_path = d['copyfrom_path']
518 copyfrom_rev = d['copyfrom_revision']
519 do_svn_add(source_repos_url, source_url, path_offset, target_url, source_rev,
520 "", "", export_paths, rev_map, path_is_dir, prefix+" ")
521 # Else just "svn export" the files from the source repo and "svn add" them.
522 else:
523 # Create (parent) directory if needed
524 p_path = path_offset if path_is_dir else os.path.dirname(path_offset).strip() or '.'
525 if not os.path.exists(p_path):
526 run_svn(["mkdir", p_path])
527 # Export the entire added tree.
528 if path_is_dir:
529 # For directories, defer the (recurisve) "svn export". Might have a
530 # situation in a branch merge where the entry in the svn-log is a
531 # non-copy-from'd "add" but there are child contents (that we haven't
532 # gotten to yet in log_entry) that are copy-from's. When we try do
533 # the "svn copy" later on in do_svn_add() for those copy-from'd paths,
534 # having pre-existing (svn-add'd) contents creates some trouble.
535 # Instead, just create the stub folders ("svn mkdir" above) and defer
536 # exporting the final file-state until the end.
537 add_path(export_paths, path_offset)
538 else:
539 # Export the final verison of this file. We *need* to do this before running
540 # the "svn add", even if we end-up re-exporting this file again via export_paths.
541 run_svn(["export", "--force", "-r", source_rev,
542 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
543 if not in_svn(path_offset, prefix=prefix+" "):
544 # Need to use in_svn here to handle cases where client committed the parent
545 # folder and each indiv sub-folder.
546 run_svn(["add", "--parents", path_offset])
547 # TODO: Need to copy SVN properties from source repos
548
549 elif action == 'D':
550 run_svn(["remove", "--force", path_offset])
551
552 elif action == 'M':
553 # TODO: Is "svn merge -c" correct here? Should this just be an "svn export" plus
554 # proplist updating?
555 out = run_svn(["merge", "-c", source_rev, "--non-recursive",
556 "--non-interactive", "--accept=theirs-full",
557 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
558
559 else:
560 raise InternalError("Internal Error: process_svn_log_entry: Unhandled 'action' value: '%s'"
561 % action)
562
563 # Export the final version of all add'd paths from source_url
564 if export_paths:
565 for path_offset in export_paths:
566 run_svn(["export", "--force", "-r", source_rev,
567 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
568
569 return commit_paths
570
571 def disp_svn_log_summary(log_entry):
572 ui.status("")
573 ui.status("r%s | %s | %s",
574 log_entry['revision'],
575 log_entry['author'],
576 str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' ')))
577 ui.status(log_entry['message'])
578 ui.status("------------------------------------------------------------------------")
579
580 def real_main(options, args):
581 source_url = args.pop(0).rstrip("/")
582 target_url = args.pop(0).rstrip("/")
583 ui.status("options: %s", str(options), level=ui.DEBUG, color='GREEN')
584
585 # Make sure that both the source and target URL's are valid
586 source_info = svnclient.get_svn_info(source_url)
587 assert source_url.startswith(source_info['repos_url'])
588 target_info = svnclient.get_svn_info(target_url)
589 assert target_url.startswith(target_info['repos_url'])
590
591 source_end_rev = source_info['revision'] # Get the last revision # for the source repo
592 source_repos_url = source_info['repos_url'] # Get the base URL for the source repo, e.g. 'svn://svn.example.com/svn/repo'
593 source_repos_uuid = source_info['repos_uuid'] # Get the UUID for the source repo
594
595 wc_target = os.path.abspath('_wc_target')
596 rev_map = {}
597 num_entries_proc = 0
598 commit_count = 0
599 source_rev = None
600 target_rev = None
601
602 # Check out a working copy of target_url if needed
603 wc_exists = os.path.exists(wc_target)
604 if wc_exists and not options.cont_from_break:
605 shutil.rmtree(wc_target)
606 wc_exists = False
607 if not wc_exists:
608 svnclient.svn_checkout(target_url, wc_target)
609 os.chdir(wc_target)
610
611 if not options.cont_from_break:
612 # TODO: Warn user if trying to start (non-continue) into a non-empty target path?
613 # Get log entry for the SVN revision we will check out
614 if options.svn_rev:
615 # If specify a rev, get log entry just before or at rev
616 source_start_log = svnclient.get_last_svn_log_entry(source_url, 1, options.svn_rev, False)
617 else:
618 # Otherwise, get log entry of branch creation
619 # Note: Trying to use svnclient.get_first_svn_log_entry(source_url, 1, source_end_rev, False)
620 # ends-up being *VERY* time-consuming on a repo with lots of revisions. Even though
621 # the "svn log" call is passing --limit 1, it seems like that limit-filter is happening
622 # _after_ svn has fetched the full log history. Instead, search the history in chunks
623 # and write some progress to the screen.
624 ui.status("Searching for start source revision (%s)...", source_url, level=ui.VERBOSE)
625 rev = 1
626 chunk_size = 1000
627 done = False
628 while not done:
629 entries = svnclient.run_svn_log(source_url, rev, min(rev+chunk_size-1, target_info['revision']), 1, get_changed_paths=False)
630 if entries:
631 source_start_log = entries[0]
632 done = True
633 break
634 ui.status("...%s...", rev)
635 rev = rev+chunk_size
636 if rev > target_info['revision']:
637 done = True
638 if not source_start_log:
639 raise InternalError("Unable to find first revision for source_url: %s" % source_url)
640
641 # This is the revision we will start from for source_url
642 source_start_rev = source_rev = int(source_start_log['revision'])
643 ui.status("Starting at source revision %s.", source_start_rev, level=ui.VERBOSE)
644
645 # For the initial commit to the target URL, export all the contents from
646 # the source URL at the start-revision.
647 paths = run_svn(["list", "-r", source_rev, source_url+"@"+str(source_rev)])
648 if len(paths)>1:
649 disp_svn_log_summary(svnclient.get_one_svn_log_entry(source_url, source_rev, source_rev))
650 ui.status("(Initial import)", level=ui.VERBOSE)
651 paths = paths.strip("\n").split("\n")
652 for path_raw in paths:
653 # For each top-level file/folder...
654 if not path_raw:
655 continue
656 # Directories have a trailing slash in the "svn list" output
657 path_is_dir = True if path_raw[-1] == "/" else False
658 path = path_raw.rstrip('/') if path_is_dir else path_raw
659 if path_is_dir and not os.path.exists(path):
660 os.makedirs(path)
661 ui.status(" A %s", source_url[len(source_repos_url):]+"/"+path, level=ui.VERBOSE)
662 run_svn(["export", "--force", "-r" , source_rev, source_url+"/"+path+"@"+str(source_rev), path])
663 run_svn(["add", path])
664 num_entries_proc += 1
665 target_revprops = gen_tracking_revprops(source_repos_uuid, source_url, source_rev) # Build source-tracking revprop's
666 target_rev = commit_from_svn_log_entry(source_start_log, options, target_revprops=target_revprops)
667 if target_rev:
668 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
669 set_rev_map(rev_map, source_rev, target_rev)
670 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
671 run_svn(["update"])
672 commit_count += 1
673 else:
674 # Re-build the rev_map based on any already-replayed history in target_url
675 rev_map = build_rev_map(target_url, source_info)
676 if not rev_map:
677 raise RuntimeError("Called with continue-mode, but no already-replayed history found in target repo: %s" % target_url)
678 source_start_rev = int(max(rev_map, key=rev_map.get))
679 assert source_start_rev
680 ui.status("Continuing from source revision %s.", source_start_rev, level=ui.VERBOSE)
681
682 svn_vers_t = svnclient.get_svn_client_version()
683 svn_vers = float(".".join(map(str, svn_vers_t[0:2])))
684
685 # Load SVN log starting from source_start_rev + 1
686 it_log_entries = svnclient.iter_svn_log_entries(source_url, source_start_rev+1, source_end_rev, get_revprops=True)
687 source_rev = None
688
689 try:
690 for log_entry in it_log_entries:
691 if options.entries_proc_limit:
692 if num_entries_proc >= options.entries_proc_limit:
693 break
694 # Replay this revision from source_url into target_url
695 disp_svn_log_summary(log_entry)
696 source_rev = log_entry['revision']
697 # Process all the changed-paths in this log entry
698 commit_paths = []
699 process_svn_log_entry(log_entry, source_repos_url, source_url, target_url,
700 rev_map, options, commit_paths)
701 num_entries_proc += 1
702 # Commit any changes made to _wc_target
703 target_revprops = gen_tracking_revprops(source_repos_uuid, source_url, source_rev) # Build source-tracking revprop's
704 target_rev = commit_from_svn_log_entry(log_entry, options, commit_paths, target_revprops=target_revprops)
705 if target_rev:
706 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
707 source_rev = log_entry['revision']
708 set_rev_map(rev_map, source_rev, target_rev)
709 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
710 run_svn(["update"])
711 commit_count += 1
712 # Run "svn cleanup" every 100 commits if SVN 1.7+, to clean-up orphaned ".svn/pristines/*"
713 if svn_vers >= 1.7 and (commit_count % 100 == 0):
714 run_svn(["cleanup"])
715 if not source_rev:
716 # If there were no new source_url revisions to process, init source_rev
717 # for the "finally" message below.
718 source_rev = source_end_rev
719
720 except KeyboardInterrupt:
721 print "\nStopped by user."
722 print "\nCleaning-up..."
723 run_svn(["cleanup"])
724 full_svn_revert()
725 except:
726 print "\nCommand failed with following error:\n"
727 traceback.print_exc()
728 print "\nCleaning-up..."
729 run_svn(["cleanup"])
730 print run_svn(["status"])
731 full_svn_revert()
732 finally:
733 print "\nFinished at source revision %s%s." % (source_rev, " (dry-run)" if options.dry_run else "")
734
735 def main():
736 # Defined as entry point. Must be callable without arguments.
737 usage = "Usage: %prog [OPTIONS] source_url target_url"
738 description = """\
739 Replicate (replay) history from one SVN repository to another. Maintain
740 logical ancestry wherever possible, so that 'svn log' on the replayed
741 repo will correctly follow file/folder renames.
742
743 == Examples ==
744 Create a copy of only /trunk from source repo, starting at r5000
745 $ svnadmin create /svn/target
746 $ svn mkdir -m 'Add trunk' file:///svn/target/trunk
747 $ svn2svn -av -r 5000 http://server/source/trunk file:///svn/target/trunk
748 1. The target_url will be checked-out to ./_wc_target
749 2. The first commit to http://server/source/trunk at/after r5000 will be
750 exported & added into _wc_target
751 3. All revisions affecting http://server/source/trunk (starting at r5000)
752 will be replayed to _wc_target. Any add/copy/move/replaces that are
753 copy-from'd some path outside of /trunk (e.g. files renamed on a
754 /branch and branch was merged into /trunk) will correctly maintain
755 logical ancestry where possible.
756
757 Use continue-mode (-c) to pick-up where the last run left-off
758 $ svn2svn -avc http://server/source/trunk file:///svn/target/trunk
759 1. The target_url will be checked-out to ./_wc_target, if not already
760 checked-out
761 2. All new revisions affecting http://server/source/trunk starting from
762 the last replayed revision to file:///svn/target/trunk (based on the
763 svn2svn:* revprops) will be replayed to _wc_target, maintaining all
764 logical ancestry where possible."""
765 parser = optparse.OptionParser(usage, description=description,
766 formatter=HelpFormatter(), version="%prog "+str(full_version))
767 #parser.remove_option("--help")
768 #parser.add_option("-h", "--help", dest="show_help", action="store_true",
769 # help="show this help message and exit")
770 parser.add_option("-r", "--revision", type="int", dest="svn_rev", metavar="REV",
771 help="initial SVN revision to start source_url replay")
772 parser.add_option("-a", "--keep-author", action="store_true", dest="keep_author", default=False,
773 help="maintain original 'Author' info from source repo")
774 parser.add_option("-c", "--continue", action="store_true", dest="cont_from_break",
775 help="continue from previous break")
776 parser.add_option("-l", "--limit", type="int", dest="entries_proc_limit", metavar="NUM",
777 help="maximum number of log entries to process")
778 parser.add_option("-n", "--dry-run", action="store_true", dest="dry_run", default=False,
779 help="try processing next log entry but don't commit changes to "
780 "target working-copy (forces --limit=1)")
781 parser.add_option("-v", "--verbose", dest="verbosity", action="count", default=1,
782 help="enable additional output (use -vv or -vvv for more)")
783 parser.add_option("--debug", dest="verbosity", const=ui.DEBUG, action="store_const",
784 help="enable debugging output (same as -vvv)")
785 options, args = parser.parse_args()
786 if len(args) != 2:
787 parser.error("incorrect number of arguments")
788 if options.verbosity < 10:
789 # Expand multiple "-v" arguments to a real ui._level value
790 options.verbosity *= 10
791 if options.dry_run:
792 # When in dry-run mode, only try to process the next log_entry
793 options.entries_proc_limit = 1
794 ui.update_config(options)
795 return real_main(options, args)
796
797
798 if __name__ == "__main__":
799 sys.exit(main() or 0)