]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn/run/svn2svn.py
Continue-mode support w/ rev_map
[svn2svn.git] / svn2svn / run / svn2svn.py
1 """
2 Replicate (replay) changesets from one SVN repository to another:
3 * Maintains full logical history (e.g. uses "svn copy" for renames).
4 * Maintains original commit messages.
5 * Optionally maintain source author info. (Only supported if accessing
6 target SVN repo via file://)
7 * Cannot maintain original commit date, but appends original commit date
8 for each commit message: "Date: %d".
9 * Optionally run an external shell script before each replayed commit
10 to give the ability to dynamically exclude or modify files as part
11 of the replay.
12
13 License: GPLv3, same as hgsvn (https://bitbucket.org/andialbrecht/hgsvn)
14 Author: Tony Duckles (https://github.com/tonyduckles/svn2svn)
15 (Inspired by http://code.google.com/p/svn2svn/, and uses code for hgsvn
16 for SVN client handling)
17 """
18
19 from .. import base_version, full_version
20 from .. import ui
21 from .. import svnclient
22 from ..shell import run_svn
23 from ..errors import (ExternalCommandFailed, UnsupportedSVNAction)
24
25 import sys
26 import os
27 import time
28 import traceback
29 import shutil
30 import operator
31 from optparse import OptionParser,OptionGroup
32 from datetime import datetime
33
34 def commit_from_svn_log_entry(entry, files=None, keep_author=False, source_props=[]):
35 """
36 Given an SVN log entry and an optional sequence of files, do an svn commit.
37 """
38 # TODO: Run optional external shell hook here, for doing pre-commit filtering
39 # This will use the local timezone for displaying commit times
40 timestamp = int(entry['date'])
41 svn_date = str(datetime.fromtimestamp(timestamp))
42 # Uncomment this one one if you prefer UTC commit times
43 #svn_date = "%d 0" % timestamp
44 if keep_author:
45 options = ["commit", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date, "--username", entry['author']]
46 else:
47 options = ["commit", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date + "\nAuthor: " + entry['author']]
48 if source_props:
49 revprops = [{'name':'svn2svn:source_uuid', 'value':source_props[0]},
50 {'name':'svn2svn:source_url', 'value':source_props[1]},
51 {'name':'svn2svn:source_rev', 'value':source_props[2]}]
52 for r in revprops:
53 options += ["--with-revprop", r['name']+"="+str(r['value'])]
54 if files:
55 options += list(files)
56 output = run_svn(options)
57 rev = None
58 if output:
59 output_lines = output.strip("\n").split("\n")
60 rev = ""
61 for line in output_lines:
62 if line[0:19] == 'Committed revision ':
63 rev = line[19:].rstrip('.')
64 break
65 if rev:
66 ui.status("Committed revision %s.", rev)
67 return rev
68
69 def full_svn_revert():
70 """
71 Do an "svn revert" and proactively remove any extra files in the working copy.
72 """
73 run_svn(["revert", "--recursive", "."])
74 output = run_svn(["status"])
75 if output:
76 output_lines = output.strip("\n").split("\n")
77 for line in output_lines:
78 if line[0] == "?":
79 path = line[4:].strip(" ")
80 if os.path.isfile(path):
81 os.remove(path)
82 if os.path.isdir(path):
83 shutil.rmtree(path)
84
85 def in_svn(p, require_in_repo=False, prefix=""):
86 """
87 Check if a given file/folder is being tracked by Subversion.
88 Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
89 With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
90 Use "svn status" to check the status of the file/folder.
91 """
92 entries = svnclient.get_svn_status(p, no_recursive=True)
93 if not entries:
94 return False
95 d = entries[0]
96 if require_in_repo and (d['status'] == 'added' or d['revision'] is None):
97 # If caller requires this path to be in the SVN repo, prevent returning True
98 # for paths that are only locally-added.
99 ret = False
100 else:
101 # Don't consider files tracked as deleted in the WC as under source-control.
102 # Consider files which are locally added/copied as under source-control.
103 ret = True if not (d['status'] == 'deleted') and (d['type'] == 'normal' or d['status'] == 'added' or d['copied'] == 'true') else False
104 ui.status(prefix + ">> in_svn('%s', require_in_repo=%s) --> %s", p, str(require_in_repo), str(ret), level=ui.DEBUG, color='GREEN')
105 return ret
106
107 def find_svn_ancestors(svn_repos_url, base_path, source_path, source_rev, prefix = ""):
108 """
109 Given a source path, walk the SVN history backwards to inspect the ancestory of
110 that path, seeing if it traces back to base_path. Build an array of copyfrom_path
111 and copyfrom_revision pairs for each of the "svn copies". If we find a copyfrom_path
112 which base_path is a substring match of (e.g. we crawled back to the initial branch-
113 copy from trunk), then return the collection of ancestor paths. Otherwise,
114 copyfrom_path has no ancestory compared to base_path.
115
116 This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
117 file/folder was renamed in a branch and then that branch was merged back to trunk.
118
119 'svn_repos_url' is the full URL to the root of the SVN repository,
120 e.g. 'file:///path/to/repo'
121 'base_path' is the path in the SVN repo to the target path we're trying to
122 trace ancestry back to, e.g. 'trunk'.
123 'source_path' is the path in the SVN repo to the source path to start checking
124 ancestry at, e.g. 'branches/fix1/projectA/file1.txt'.
125 (full_path = svn_repos_url+base_path+"/"+path_offset)
126 'source_rev' is the revision to start walking the history of source_path backwards from.
127 """
128 ui.status(prefix + ">> find_svn_ancestors: Start: (%s) source_path: %s base_path: %s",
129 svn_repos_url, source_path+"@"+str(source_rev), base_path, level=ui.DEBUG, color='YELLOW')
130 done = False
131 working_path = base_path+"/"+source_path
132 working_rev = source_rev
133 first_iter_done = False
134 ancestors_temp = []
135 while not done:
136 # Get the first "svn log" entry for this path (relative to @rev)
137 ui.status(prefix + ">> find_svn_ancestors: %s", svn_repos_url + working_path+"@"+str(working_rev), level=ui.DEBUG, color='YELLOW')
138 log_entry = svnclient.get_first_svn_log_entry(svn_repos_url + working_path+"@"+str(working_rev), 1, working_rev, True)
139 if not log_entry:
140 ui.status(prefix + ">> find_svn_ancestors: Done: no log_entry", level=ui.DEBUG, color='YELLOW')
141 done = True
142 break
143 # If we found a copy-from case which matches our base_path, we're done.
144 # ...but only if we've at least tried to search for the first copy-from path.
145 if first_iter_done and working_path.startswith(base_path):
146 ui.status(prefix + ">> find_svn_ancestors: Done: Found working_path.startswith(base_path) and first_iter_done=True", level=ui.DEBUG, color='YELLOW')
147 done = True
148 break
149 first_iter_done = True
150 # Search for any actions on our target path (or parent paths).
151 changed_paths_temp = []
152 for d in log_entry['changed_paths']:
153 path = d['path']
154 if path in working_path:
155 changed_paths_temp.append({'path': path, 'data': d})
156 if not changed_paths_temp:
157 # If no matches, then we've hit the end of the chain and this path has no ancestry back to base_path.
158 ui.status(prefix + ">> find_svn_ancestors: Done: No matching changed_paths", level=ui.DEBUG, color='YELLOW')
159 done = True
160 continue
161 # Reverse-sort any matches, so that we start with the most-granular (deepest in the tree) path.
162 changed_paths = sorted(changed_paths_temp, key=operator.itemgetter('path'), reverse=True)
163 # Find the action for our working_path in this revision. Use a loop to check in reverse order,
164 # so that if the target file/folder is "M" but has a parent folder with an "A" copy-from.
165 for v in changed_paths:
166 d = v['data']
167 path = d['path']
168 # Check action-type for this file
169 action = d['action']
170 if action not in 'MARD':
171 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
172 % (log_entry['revision'], action))
173 ui.status(prefix + "> %s %s%s", action, path,
174 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
175 level=ui.DEBUG, color='YELLOW')
176 if action == 'D':
177 # If file/folder was deleted, it has no ancestor
178 ancestors_temp = []
179 ui.status(prefix + ">> find_svn_ancestors: Done: deleted", level=ui.DEBUG, color='YELLOW')
180 done = True
181 break
182 if action in 'RA':
183 # If file/folder was added/replaced but not a copy, it has no ancestor
184 if not d['copyfrom_path']:
185 ancestors_temp = []
186 ui.status(prefix + ">> find_svn_ancestors: Done: %s with no copyfrom_path",
187 "Added" if action == "A" else "Replaced",
188 level=ui.DEBUG, color='YELLOW')
189 done = True
190 break
191 # Else, file/folder was added/replaced and is a copy, so add an entry to our ancestors list
192 # and keep checking for ancestors
193 ui.status(prefix + ">> find_svn_ancestors: Found copy-from (action=%s): %s --> %s",
194 action, path, d['copyfrom_path']+"@"+str(d['copyfrom_revision']),
195 level=ui.DEBUG, color='YELLOW')
196 ancestors_temp.append({'path': path, 'revision': log_entry['revision'],
197 'copyfrom_path': d['copyfrom_path'], 'copyfrom_rev': d['copyfrom_revision']})
198 working_path = working_path.replace(d['path'], d['copyfrom_path'])
199 working_rev = d['copyfrom_revision']
200 # Follow the copy and keep on searching
201 break
202 ancestors = []
203 if ancestors_temp:
204 ancestors.append({'path': base_path+"/"+source_path, 'revision': source_rev})
205 working_path = base_path+"/"+source_path
206 for idx in range(len(ancestors_temp)):
207 d = ancestors_temp[idx]
208 working_path = working_path.replace(d['path'], d['copyfrom_path'])
209 working_rev = d['copyfrom_rev']
210 ancestors.append({'path': working_path, 'revision': working_rev})
211 max_len = 0
212 for idx in range(len(ancestors)):
213 d = ancestors[idx]
214 max_len = max(max_len, len(d['path']+"@"+str(d['revision'])))
215 ui.status(prefix + ">> find_svn_ancestors: Found parent ancestors:", level=ui.DEBUG, color='YELLOW_B')
216 for idx in range(len(ancestors)-1):
217 d = ancestors[idx]
218 d_next = ancestors[idx+1]
219 ui.status(prefix + " [%s] %s <-- %s", idx,
220 str(d['path']+"@"+str(d['revision'])).ljust(max_len),
221 str(d_next['path']+"@"+str(d_next['revision'])).ljust(max_len),
222 level=ui.DEBUG, color='YELLOW')
223 else:
224 ui.status(prefix + ">> find_svn_ancestors: No ancestor-chain found: %s",
225 svn_repos_url+base_path+"/"+source_path+"@"+str(source_rev), level=ui.DEBUG, color='YELLOW')
226 return ancestors
227
228 def get_rev_map(rev_map, source_rev, prefix):
229 """
230 Find the equivalent rev # in the target repo for the given rev # from the source repo.
231 """
232 ui.status(prefix + ">> get_rev_map(%s)", source_rev, level=ui.DEBUG, color='GREEN')
233 # Find the highest entry less-than-or-equal-to source_rev
234 for rev in range(int(source_rev), 0, -1):
235 ui.status(prefix + ">> get_rev_map: rev=%s in_rev_map=%s", rev, str(rev in rev_map), level=ui.DEBUG, color='BLACK_B')
236 if rev in rev_map:
237 return int(rev_map[rev])
238 # Else, we fell off the bottom of the rev_map. Ruh-roh...
239 return None
240
241 def set_rev_map(rev_map, source_rev, target_rev):
242 ui.status(">> set_rev_map: source_rev=%s target_rev=%s", source_rev, target_rev, level=ui.DEBUG, color='GREEN')
243 rev_map[int(source_rev)]=int(target_rev)
244
245 def build_rev_map(target_url, source_info):
246 """
247 Check for any already-replayed history from source_url (source_info) and
248 build the mapping-table of source_rev -> target_rev.
249 """
250 rev_map = {}
251 ui.status("Rebuilding rev_map...", level=ui.VERBOSE)
252 proc_count = 0
253 it_log_entries = svnclient.iter_svn_log_entries(target_url, 1, 'HEAD', get_changed_paths=False, get_revprops=True)
254 for log_entry in it_log_entries:
255 if log_entry['revprops']:
256 revprops = {}
257 for v in log_entry['revprops']:
258 if v['name'].startswith('svn2svn:'):
259 revprops[v['name']] = v['value']
260 if revprops['svn2svn:source_uuid'] == source_info['repos_uuid'] and \
261 revprops['svn2svn:source_url'] == source_info['url']:
262 source_rev = revprops['svn2svn:source_rev']
263 target_rev = log_entry['revision']
264 set_rev_map(rev_map, source_rev, target_rev)
265 return rev_map
266
267 def get_svn_dirlist(svn_path, svn_rev = ""):
268 """
269 Get a list of all the child contents (recusive) of the given folder path.
270 """
271 args = ["list"]
272 path = svn_path
273 if svn_rev:
274 args += ["-r", svn_rev]
275 path += "@"+str(svn_rev)
276 args += [path]
277 paths = run_svn(args, no_fail=True)
278 paths = paths.strip("\n").split("\n") if len(paths)>1 else []
279 return paths
280
281 def _add_export_path(export_paths, path_offset):
282 found = False
283 for p in export_paths:
284 if path_offset.startswith(p):
285 found = True
286 break
287 if not found:
288 export_paths.append(path_offset)
289 return export_paths
290
291 def do_svn_add(source_repos_url, source_url, path_offset, target_url, source_rev, \
292 parent_copyfrom_path="", parent_copyfrom_rev="", export_paths={}, \
293 rev_map={}, is_dir = False, prefix = ""):
294 """
295 Given the add'd source path, replay the "svn add/copy" commands to correctly
296 track renames across copy-from's.
297
298 For example, consider a sequence of events like this:
299 1. svn copy /trunk /branches/fix1
300 2. (Make some changes on /branches/fix1)
301 3. svn mv /branches/fix1/Proj1 /branches/fix1/Proj2 " Rename folder
302 4. svn mv /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt " Rename file inside renamed folder
303 5. svn co /trunk && svn merge /branches/fix1
304 After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
305 and and add of /trunk/Proj2 copy-from /branches/fix1/Proj2. If we were just
306 to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
307 we'd lose the logical history that Proj2/file2.txt is really a descendant
308 of Proj1/file1.txt.
309
310 'source_repos_url' is the full URL to the root of the source repository.
311 'source_url' is the full URL to the source path in the source repository.
312 'path_offset' is the offset from source_base to the file to check ancestry for,
313 e.g. 'projectA/file1.txt'. path = source_repos_url + source_base + path_offset.
314 'target_url' is the full URL to the target path in the target repository.
315 'source_rev' is the revision ("svn log") that we're processing from the source repo.
316 'parent_copyfrom_path' and 'parent_copyfrom_rev' is the copy-from path of the parent
317 directory, when being called recursively by do_svn_add_dir().
318 'export_paths' is the list of path_offset's that we've deferred running "svn export" on.
319 'rev_map' is the running mapping-table dictionary for source-repo rev #'s
320 to the equivalent target-repo rev #'s.
321 'is_dir' is whether path_offset is a directory (rather than a file).
322 """
323 source_base = source_url[len(source_repos_url):]
324 ui.status(prefix + ">> do_svn_add: %s %s", source_base+"/"+path_offset+"@"+str(source_rev),
325 " (parent-copyfrom: "+parent_copyfrom_path+"@"+str(parent_copyfrom_rev)+")" if parent_copyfrom_path else "",
326 level=ui.DEBUG, color='GREEN')
327 # Check if the given path has ancestors which chain back to the current source_base
328 found_ancestor = False
329 ancestors = find_svn_ancestors(source_repos_url, source_base, path_offset, source_rev, prefix+" ")
330 # ancestors[n] is the original (pre-branch-copy) trunk path.
331 # ancestors[n-1] is the first commit on the new branch.
332 copyfrom_path = ancestors[len(ancestors)-1]['path'] if ancestors else ""
333 copyfrom_rev = ancestors[len(ancestors)-1]['revision'] if ancestors else ""
334 if ancestors:
335 # The copy-from path has ancestory back to source_url.
336 ui.status(prefix + ">> do_svn_add: Check copy-from: Found parent: %s", copyfrom_path+"@"+str(copyfrom_rev),
337 level=ui.DEBUG, color='GREEN', bold=True)
338 found_ancestor = True
339 # Map the copyfrom_rev (source repo) to the equivalent target repo rev #. This can
340 # return None in the case where copyfrom_rev is *before* our source_start_rev.
341 tgt_rev = get_rev_map(rev_map, copyfrom_rev, prefix+" ")
342 ui.status(prefix + ">> do_svn_add: get_rev_map: %s (source) -> %s (target)", copyfrom_rev, tgt_rev, level=ui.DEBUG, color='GREEN')
343 else:
344 ui.status(prefix + ">> do_svn_add: Check copy-from: No ancestor chain found.", level=ui.DEBUG, color='GREEN')
345 found_ancestor = False
346 if found_ancestor and tgt_rev:
347 # Check if this path_offset in the target WC already has this ancestry, in which
348 # case there's no need to run the "svn copy" (again).
349 path_in_svn = in_svn(path_offset, prefix=prefix+" ")
350 log_entry = svnclient.get_last_svn_log_entry(path_offset, 1, 'HEAD', get_changed_paths=False) if in_svn(path_offset, require_in_repo=True, prefix=prefix+" ") else []
351 if (not log_entry or (log_entry['revision'] != tgt_rev)):
352 copyfrom_offset = copyfrom_path[len(source_base):].strip('/')
353 ui.status(prefix + ">> do_svn_add: svn_copy: Copy-from: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
354 ui.status(prefix + " copyfrom: %s", copyfrom_path+"@"+str(copyfrom_rev), level=ui.DEBUG, color='GREEN')
355 ui.status(prefix + " p_copyfrom: %s", parent_copyfrom_path+"@"+str(parent_copyfrom_rev) if parent_copyfrom_path else "", level=ui.DEBUG, color='GREEN')
356 if path_in_svn and \
357 ((parent_copyfrom_path and copyfrom_path.startswith(parent_copyfrom_path)) and \
358 (parent_copyfrom_rev and copyfrom_rev == parent_copyfrom_rev)):
359 # When being called recursively, if this child entry has the same ancestor as the
360 # the parent, then no need to try to run another "svn copy".
361 ui.status(prefix + ">> do_svn_add: svn_copy: Same ancestry as parent: %s",
362 parent_copyfrom_path+"@"+str(parent_copyfrom_rev),level=ui.DEBUG, color='GREEN')
363 pass
364 else:
365 # Copy this path from the equivalent path+rev in the target repo, to create the
366 # equivalent history.
367 if parent_copyfrom_path:
368 # If we have a parent copy-from path, we mis-match that so display a status
369 # message describing the action we're mimic'ing. If path_in_svn, then this
370 # is logically a "replace" rather than an "add".
371 ui.status(" %s %s (from %s)", ('R' if path_in_svn else 'A'), source_base+"/"+path_offset, ancestors[1]['path']+"@"+str(copyfrom_rev), level=ui.VERBOSE)
372 if path_in_svn:
373 # If local file is already under version-control, then this is a replace.
374 ui.status(prefix + ">> do_svn_add: pre-copy: local path already exists: %s", path_offset, level=ui.DEBUG, color='GREEN')
375 run_svn(["remove", "--force", path_offset])
376 run_svn(["copy", "-r", tgt_rev, target_url+"/"+copyfrom_offset+"@"+str(tgt_rev), path_offset])
377 # Export the final version of this file/folder from the source repo, to make
378 # sure we're up-to-date.
379 export_paths = _add_export_path(export_paths, path_offset)
380 else:
381 ui.status(prefix + ">> do_svn_add: Skipped 'svn copy': %s", path_offset, level=ui.DEBUG, color='GREEN')
382 else:
383 # Else, either this copy-from path has no ancestry back to source_url OR copyfrom_rev comes
384 # before our initial source_start_rev (i.e. tgt_rev == None), so can't do a "svn copy".
385 # Create (parent) directory if needed.
386 # TODO: This is (nearly) a duplicate of code in process_svn_log_entry(). Should this be
387 # split-out to a shared tag?
388 p_path = path_offset if is_dir else os.path.dirname(path_offset).strip() or '.'
389 if not os.path.exists(p_path):
390 run_svn(["mkdir", p_path])
391 if not in_svn(path_offset, prefix=prefix+" "):
392 if is_dir:
393 # Export the final verison of all files in this folder.
394 export_paths = _add_export_path(export_paths, path_offset)
395 else:
396 # Export the final verison of this file. We *need* to do this before running
397 # the "svn add", even if we end-up re-exporting this file again via export_paths.
398 run_svn(["export", "--force", "-r", source_rev,
399 source_repos_url+source_base+"/"+path_offset+"@"+str(source_rev), path_offset])
400 # If not already under version-control, then "svn add" this file/folder.
401 run_svn(["add", "--parents", path_offset])
402 # TODO: Need to copy SVN properties from source repos
403 if is_dir:
404 # For any folders that we process, process any child contents, so that we correctly
405 # replay copies/replaces/etc.
406 do_svn_add_dir(source_repos_url, source_url, path_offset, source_rev, target_url,
407 copyfrom_path, copyfrom_rev, export_paths, rev_map, prefix+" ")
408
409 def do_svn_add_dir(source_repos_url, source_url, path_offset, source_rev, target_url, \
410 parent_copyfrom_path, parent_copyfrom_rev, export_paths, rev_map, prefix=""):
411 source_base = source_url[len(source_repos_url):]
412 # Get the directory contents, to compare between the local WC (target_url) vs. the remote repo (source_url)
413 # TODO: paths_local won't include add'd paths because "svn ls" lists the contents of the
414 # associated remote repo folder. (Is this a problem?)
415 paths_local = get_svn_dirlist(path_offset)
416 paths_remote = get_svn_dirlist(source_url+"/"+path_offset, source_rev)
417 ui.status(prefix + ">> do_svn_add_dir: paths_local: %s", str(paths_local), level=ui.DEBUG, color='GREEN')
418 ui.status(prefix + ">> do_svn_add_dir: paths_remote: %s", str(paths_remote), level=ui.DEBUG, color='GREEN')
419 # Update files/folders which exist in remote but not local
420 for path in paths_remote:
421 path_is_dir = True if path[-1] == "/" else False
422 working_path = path_offset+"/"+(path.rstrip('/') if path_is_dir else path)
423 do_svn_add(source_repos_url, source_url, working_path, target_url, source_rev,
424 parent_copyfrom_path, parent_copyfrom_rev, export_paths,
425 rev_map, path_is_dir, prefix+" ")
426 # Remove files/folders which exist in local but not remote
427 for path in paths_local:
428 if not path in paths_remote:
429 ui.status(" %s %s", 'D', source_base+"/"+path_offset+"/"+path, level=ui.VERBOSE)
430 run_svn(["remove", "--force", path_offset+"/"+path])
431 # TODO: Does this handle deleted folders too? Wouldn't want to have a case
432 # where we only delete all files from folder but leave orphaned folder around.
433
434 def process_svn_log_entry(log_entry, source_repos_url, source_url, target_url, \
435 rev_map, commit_paths = [], prefix = ""):
436 """
437 Process SVN changes from the given log entry.
438 Returns array of all the paths in the working-copy that were changed,
439 i.e. the paths which need to be "svn commit".
440
441 'log_entry' is the array structure built by parse_svn_log_xml().
442 'source_repos_url' is the full URL to the root of the source repository.
443 'source_url' is the full URL to the source path in the source repository.
444 'target_url' is the full URL to the target path in the target repository.
445 'rev_map' is the running mapping-table dictionary for source-repo rev #'s
446 to the equivalent target-repo rev #'s.
447 'commit_paths' is the working list of specific paths which changes to pass
448 to the final "svn commit".
449 """
450 removed_paths = []
451 export_paths = []
452 # Get the relative offset of source_url based on source_repos_url
453 # e.g. '/branches/bug123'
454 source_base = source_url[len(source_repos_url):]
455 source_rev = log_entry['revision']
456 ui.status(prefix + ">> process_svn_log_entry: %s", source_url+"@"+str(source_rev), level=ui.DEBUG, color='GREEN')
457 for d in log_entry['changed_paths']:
458 # Get the full path for this changed_path
459 # e.g. '/branches/bug123/projectA/file1.txt'
460 path = d['path']
461 if not path.startswith(source_base + "/"):
462 # Ignore changed files that are not part of this subdir
463 if path != source_base:
464 ui.status(prefix + ">> process_svn_log_entry: Unrelated path: %s (base: %s)", path, source_base, level=ui.DEBUG, color='GREEN')
465 continue
466 # Calculate the offset (based on source_base) for this changed_path
467 # e.g. 'projectA/file1.txt'
468 # (path = source_base + "/" + path_offset)
469 path_offset = path[len(source_base):].strip("/")
470 # Get the action for this path
471 action = d['action']
472 if action not in 'MARD':
473 raise UnsupportedSVNAction("In SVN rev. %d: action '%s' not supported. Please report a bug!"
474 % (source_rev, action))
475 if action not in 'D':
476 # (Note: Skip displaying action message for 'D' here since we'll display that
477 # message when we process the deferred delete actions at the end.)
478 ui.status(" %s %s%s", action, d['path'],
479 (" (from %s)" % (d['copyfrom_path']+"@"+str(d['copyfrom_revision']))) if d['copyfrom_path'] else "",
480 level=ui.VERBOSE)
481
482 # Try to be efficient and keep track of an explicit list of paths in the
483 # working copy that changed. If we commit from the root of the working copy,
484 # then SVN needs to crawl the entire working copy looking for pending changes.
485 # But, if we gather too many paths to commit, then we wipe commit_paths below
486 # and end-up doing a commit at the root of the working-copy.
487 if len (commit_paths) < 100:
488 commit_paths.append(path_offset)
489
490 # Special-handling for replace's
491 if action == 'R':
492 # If file was "replaced" (deleted then re-added, all in same revision),
493 # then we need to run the "svn rm" first, then change action='A'. This
494 # lets the normal code below handle re-"svn add"'ing the files. This
495 # should replicate the "replace".
496 run_svn(["remove", "--force", path_offset])
497 action = 'A'
498
499 # Handle all the various action-types
500 # (Handle "add" first, for "svn copy/move" support)
501 if action == 'A':
502 # If we have any queued deletions for this same path, remove those if we're re-adding this path.
503 if path_offset in removed_paths:
504 removed_paths.remove(path_offset)
505 # Determine where to export from.
506 svn_copy = False
507 path_is_dir = True if d['kind'] == 'dir' else False
508 # Handle cases where this "add" was a copy from another URL in the source repos
509 if d['copyfrom_revision']:
510 copyfrom_path = d['copyfrom_path']
511 copyfrom_rev = d['copyfrom_revision']
512 do_svn_add(source_repos_url, source_url, path_offset, target_url, source_rev,
513 "", "", export_paths, rev_map, path_is_dir, prefix+" ")
514 # Else just "svn export" the files from the source repo and "svn add" them.
515 else:
516 # Create (parent) directory if needed
517 p_path = path_offset if path_is_dir else os.path.dirname(path_offset).strip() or '.'
518 if not os.path.exists(p_path):
519 run_svn(["mkdir", p_path])
520 # Export the entire added tree.
521 if path_is_dir:
522 export_paths = _add_export_path(export_paths, path_offset)
523 else:
524 # Export the final verison of this file. We *need* to do this before running
525 # the "svn add", even if we end-up re-exporting this file again via export_paths.
526 run_svn(["export", "--force", "-r", source_rev,
527 source_repos_url+source_base+"/"+path_offset+"@"+str(source_rev), path_offset])
528 if not in_svn(path_offset, prefix=prefix+" "):
529 # Need to use in_svn here to handle cases where client committed the parent
530 # folder and each indiv sub-folder.
531 run_svn(["add", "--parents", path_offset])
532 # TODO: Need to copy SVN properties from source repos
533
534 elif action == 'D':
535 # Queue "svn remove" commands, to allow the action == 'A' handling the opportunity
536 # to do smart "svn copy" handling on copy/move/renames.
537 if not path_offset in removed_paths:
538 removed_paths.append(path_offset)
539
540 elif action == 'M':
541 # TODO: Is "svn merge -c" correct here? Should this just be an "svn export" plus
542 # proplist updating?
543 out = run_svn(["merge", "-c", source_rev, "--non-recursive",
544 "--non-interactive", "--accept=theirs-full",
545 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
546
547 else:
548 raise SVNError("Internal Error: process_svn_log_entry: Unhandled 'action' value: '%s'"
549 % action)
550
551 # Process any deferred removed actions
552 if removed_paths:
553 path_base = source_url[len(source_repos_url):]
554 for path_offset in removed_paths:
555 ui.status(" %s %s", 'D', path_base+"/"+path_offset, level=ui.VERBOSE)
556 run_svn(["remove", "--force", path_offset])
557 # Export the final version of all add'd paths from source_url
558 if export_paths:
559 for path_offset in export_paths:
560 run_svn(["export", "--force", "-r", source_rev,
561 source_repos_url+source_base+"/"+path_offset+"@"+str(source_rev), path_offset])
562
563 return commit_paths
564
565 def disp_svn_log_summary(log_entry):
566 ui.status("")
567 ui.status("r%s | %s | %s",
568 log_entry['revision'],
569 log_entry['author'],
570 str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' ')))
571 ui.status(log_entry['message'])
572 ui.status("------------------------------------------------------------------------")
573
574 def pull_svn_rev(log_entry, source_repos_url, source_repos_uuid, source_url, target_url, rev_map, keep_author=False):
575 """
576 Pull SVN changes from the given log entry.
577 Returns the new SVN revision.
578 If an exception occurs, it will rollback to revision 'source_rev - 1'.
579 """
580 disp_svn_log_summary(log_entry)
581 source_rev = log_entry['revision']
582
583 # Process all the paths in this log entry
584 commit_paths = []
585 process_svn_log_entry(log_entry, source_repos_url, source_url, target_url,
586 rev_map, commit_paths)
587 # If we had too many individual paths to commit, wipe the list and just commit at
588 # the root of the working copy.
589 if len (commit_paths) > 99:
590 commit_paths = []
591
592 # Add source-tracking revprop's
593 source_props = [source_repos_uuid, source_url, source_rev]
594 return commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author, source_props=source_props)
595
596 def run_parser(parser):
597 """
598 Add common options to an OptionParser instance, and run parsing.
599 """
600 parser.add_option("", "--version", dest="show_version", action="store_true",
601 help="show version and exit")
602 parser.remove_option("--help")
603 parser.add_option("-h", "--help", dest="show_help", action="store_true",
604 help="show this help message and exit")
605 parser.add_option("-v", "--verbose", dest="verbosity", const=ui.VERBOSE,
606 default=10, action="store_const",
607 help="enable additional output")
608 parser.add_option("--debug", dest="verbosity", const=ui.DEBUG,
609 action="store_const",
610 help="enable debugging output")
611 options, args = parser.parse_args()
612 if options.show_help:
613 parser.print_help()
614 sys.exit(0)
615 if options.show_version:
616 prog_name = os.path.basename(sys.argv[0])
617 print prog_name, full_version
618 sys.exit(0)
619 ui.update_config(options)
620 return options, args
621
622 def display_parser_error(parser, message):
623 """
624 Display an options error, and terminate.
625 """
626 print "error:", message
627 print
628 parser.print_help()
629 sys.exit(1)
630
631 def real_main(options, args):
632 source_url = args.pop(0).rstrip("/")
633 target_url = args.pop(0).rstrip("/")
634 if options.keep_author:
635 keep_author = True
636 else:
637 keep_author = False
638
639 # Make sure that both the source and target URL's are valid
640 source_info = svnclient.get_svn_info(source_url)
641 assert source_url.startswith(source_info['repos_url'])
642 target_info = svnclient.get_svn_info(target_url)
643 assert target_url.startswith(target_info['repos_url'])
644
645 source_end_rev = source_info['revision'] # Get the last revision # for the source repo
646 source_repos_url = source_info['repos_url'] # Get the base URL for the source repo, e.g. 'svn://svn.example.com/svn/repo'
647 source_repos_uuid = source_info['repos_uuid'] # Get the UUID for the source repo
648
649 wc_target = os.path.abspath('_wc_target')
650 rev_map = {}
651
652 # Check out a working copy of target_url if needed
653 wc_exists = os.path.exists(wc_target)
654 if wc_exists and not options.cont_from_break:
655 shutil.rmtree(wc_target)
656 wc_exists = False
657 if not wc_exists:
658 svnclient.svn_checkout(target_url, wc_target)
659 os.chdir(wc_target)
660
661 if not options.cont_from_break:
662 # Get log entry for the SVN revision we will check out
663 if options.svn_rev:
664 # If specify a rev, get log entry just before or at rev
665 source_start_log = svnclient.get_last_svn_log_entry(source_url, 1, options.svn_rev, False)
666 else:
667 # Otherwise, get log entry of branch creation
668 # Note: Trying to use svnclient.get_first_svn_log_entry(source_url, 1, source_end_rev, False)
669 # ends-up being *VERY* time-consuming on a repo with lots of revisions. Even though
670 # the "svn log" call is passing --limit 1, it seems like that limit-filter is happening
671 # _after_ svn has fetched the full log history. Instead, search the history in chunks
672 # and write some progress to the screen.
673 ui.status("Searching for start source revision (%s)...", source_url, level=ui.VERBOSE)
674 rev = 1
675 chunk_size = 1000
676 done = False
677 while not done:
678 entries = svnclient.run_svn_log(source_url, rev, min(rev+chunk_size-1, target_info['revision']), 1, get_changed_paths=False)
679 if entries:
680 source_start_log = entries[0]
681 done = True
682 break
683 ui.status("...%s...", rev)
684 rev = rev+chunk_size
685 if rev > target_info['revision']:
686 done = True
687 if not source_start_log:
688 raise RuntimeError("Unable to find first revision for source_url: %s" % source_url)
689
690 # This is the revision we will start from for source_url
691 source_start_rev = source_rev = int(source_start_log['revision'])
692 ui.status("Starting at source revision %s.", source_start_rev, level=ui.VERBOSE)
693
694 # For the initial commit to the target URL, export all the contents from
695 # the source URL at the start-revision.
696 paths = run_svn(["list", "-r", source_rev, source_url+"@"+str(source_rev)])
697 if len(paths)>1:
698 disp_svn_log_summary(svnclient.get_one_svn_log_entry(source_url, source_rev, source_rev))
699 ui.status("(Initial import)", level=ui.VERBOSE)
700 paths = paths.strip("\n").split("\n")
701 for path_raw in paths:
702 # For each top-level file/folder...
703 if not path_raw:
704 continue
705 # Directories have a trailing slash in the "svn list" output
706 path_is_dir = True if path_raw[-1] == "/" else False
707 path = path_raw.rstrip('/') if path_is_dir else path_raw
708 if path_is_dir and not os.path.exists(path):
709 os.makedirs(path)
710 ui.status(" A %s", source_url[len(source_repos_url):]+"/"+path, level=ui.VERBOSE)
711 run_svn(["export", "--force", "-r" , source_rev, source_url+"/"+path+"@"+str(source_rev), path])
712 run_svn(["add", path])
713 source_props = [source_repos_uuid, source_url, source_rev]
714 target_rev = commit_from_svn_log_entry(source_start_log, [], keep_author=keep_author, source_props=source_props)
715 if target_rev:
716 set_rev_map(rev_map, source_rev, target_rev)
717 else:
718 # Re-build the rev_map based on any already-replayed history in target_url
719 rev_map = build_rev_map(target_url, source_info)
720 if not rev_map:
721 raise RuntimeError("Called with continue-mode, but no already-replayed history found in target repo: %s" % target_url)
722 source_start_rev = int(max(rev_map, key=rev_map.get))
723 assert source_start_rev
724 ui.status("Continue from source revision %s.", source_start_rev, level=ui.VERBOSE)
725
726 commit_count = 0
727 svn_vers_t = svnclient.get_svn_client_version()
728 svn_vers = float(".".join(map(str, svn_vers_t[0:2])))
729
730 # Load SVN log starting from source_start_rev + 1
731 it_log_entries = svnclient.iter_svn_log_entries(source_url, source_start_rev+1, source_end_rev)
732
733 try:
734 for log_entry in it_log_entries:
735 # Replay this revision from source_url into target_url
736 target_rev = pull_svn_rev(log_entry, source_repos_url, source_repos_uuid, source_url,
737 target_url, rev_map, keep_author)
738 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
739 run_svn(["update"])
740 commit_count += 1
741 # Run "svn cleanup" every 100 commits if SVN 1.7+, to clean-up orphaned ".svn/pristines/*"
742 if svn_vers >= 1.7 and (commit_count % 100 == 0):
743 run_svn(["cleanup"])
744 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
745 if target_rev:
746 source_rev = log_entry['revision']
747 set_rev_map(rev_map, source_rev, target_rev)
748
749 except KeyboardInterrupt:
750 print "\nStopped by user."
751 run_svn(["cleanup"])
752 full_svn_revert()
753 except:
754 print "\nCommand failed with following error:\n"
755 traceback.print_exc()
756 run_svn(["cleanup"])
757 print run_svn(["status"])
758 full_svn_revert()
759 finally:
760 run_svn(["update"])
761 print "\nFinished!"
762
763 def main():
764 # Defined as entry point. Must be callable without arguments.
765 usage = "Usage: %prog [OPTIONS] source_url target_url"
766 parser = OptionParser(usage)
767 parser.add_option("-r", "--revision", type="int", dest="svn_rev", metavar="REV",
768 help="initial SVN revision to checkout from")
769 parser.add_option("-a", "--keep-author", action="store_true", dest="keep_author",
770 help="maintain original Author info from source repo")
771 parser.add_option("-c", "--continue", action="store_true", dest="cont_from_break",
772 help="continue from previous break")
773 (options, args) = run_parser(parser)
774 if len(args) != 2:
775 display_parser_error(parser, "incorrect number of arguments")
776 return real_main(options, args)
777
778
779 if __name__ == "__main__":
780 sys.exit(main() or 0)