]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn.py
Initial rev_map support handling and better svn-copy handling
[svn2svn.git] / svn2svn.py
1 #!/usr/bin/env python
2 """
3 svn2svn.py
4
5 Replicate (replay) changesets from one SVN repository to another:
6 * Maintains full logical history (e.g. uses "svn copy" for renames).
7 * Maintains original commit messages.
8 * Cannot maintain original commit date, but appends original commit date
9 for each commit message: "Date: %d".
10 * Optionally maintain source author info. (Only supported if accessing
11 target SVN repo via file://)
12 * Optionally run an external shell script before each replayed commit
13 to give the ability to dynamically exclude or modify files as part
14 of the replay.
15
16 License: GPLv2, the same as hgsvn.
17 Author: Tony Duckles (https://github.com/tonyduckles/svn2svn)
18 (This is a forked and modified verison of http://code.google.com/p/svn2svn/)
19 """
20
21 import os
22 import sys
23 import time
24 import locale
25 import shutil
26 import select
27 import calendar
28 import traceback
29
30 from optparse import OptionParser
31 from subprocess import Popen, PIPE
32 from datetime import datetime
33 from operator import itemgetter
34
35 try:
36 from xml.etree import cElementTree as ET
37 except ImportError:
38 try:
39 from xml.etree import ElementTree as ET
40 except ImportError:
41 try:
42 import cElementTree as ET
43 except ImportError:
44 from elementtree import ElementTree as ET
45
46 svn_log_args = ['log', '--xml']
47 svn_info_args = ['info', '--xml']
48 svn_checkout_args = ['checkout', '-q']
49 svn_status_args = ['status', '--xml', '-v', '--ignore-externals']
50
51 # Setup debug options
52 debug = False
53 debug_runsvn_timing = False # Display how long each "svn" OS command took to run?
54 # Setup verbosity options
55 runsvn_showcmd = False # Display every "svn" OS command we run?
56 runsvn_showout = False # Display the stdout results from every "svn" OS command we run?
57 svnlog_verbose = True # Display each action + changed-path as we walk the history?
58
59 # define exception class
60 class ExternalCommandFailed(RuntimeError):
61 """
62 An external command failed.
63 """
64
65 def display_error(message, raise_exception = True):
66 """
67 Display error message, then terminate.
68 """
69 print "Error:", message
70 print
71 if raise_exception:
72 raise ExternalCommandFailed
73 else:
74 sys.exit(1)
75
76 # Windows compatibility code by Bill Baxter
77 if os.name == "nt":
78 def find_program(name):
79 """
80 Find the name of the program for Popen.
81 Windows is finnicky about having the complete file name. Popen
82 won't search the %PATH% for you automatically.
83 (Adapted from ctypes.find_library)
84 """
85 # See MSDN for the REAL search order.
86 base, ext = os.path.splitext(name)
87 if ext:
88 exts = [ext]
89 else:
90 exts = ['.bat', '.exe']
91 for directory in os.environ['PATH'].split(os.pathsep):
92 for e in exts:
93 fname = os.path.join(directory, base + e)
94 if os.path.exists(fname):
95 return fname
96 return None
97 else:
98 def find_program(name):
99 """
100 Find the name of the program for Popen.
101 On Unix, popen isn't picky about having absolute paths.
102 """
103 return name
104
105 def shell_quote(s):
106 if runsvn_showcmd:
107 import re
108 p = re.compile('^[A-Za-z0-9=-]+$')
109 if p.match(s):
110 return s
111 if os.name == "nt":
112 q = '"'
113 else:
114 q = "'"
115 return q + s.replace('\\', '\\\\').replace("'", "'\"'\"'") + q
116
117 locale_encoding = locale.getpreferredencoding()
118
119 def run_svn(args, fail_if_stderr=False, encoding="utf-8"):
120 """
121 Run svn cmd in PIPE
122 exit if svn cmd failed
123 """
124 def _transform_arg(a):
125 if isinstance(a, unicode):
126 a = a.encode(encoding or locale_encoding)
127 elif not isinstance(a, str):
128 a = str(a)
129 return a
130 t_args = map(_transform_arg, args)
131
132 cmd = find_program("svn")
133 cmd_string = str(" ".join(map(shell_quote, [cmd] + t_args)))
134 if runsvn_showcmd:
135 print "\x1b[34m"+"$", cmd_string + "\x1b[0m"
136 if debug_runsvn_timing:
137 time1 = time.time()
138 pipe = Popen([cmd] + t_args, executable=cmd, stdout=PIPE, stderr=PIPE)
139 out, err = pipe.communicate()
140 if debug_runsvn_timing:
141 time2 = time.time()
142 print "(" + str(round(time2-time1,4)) + " elapsed)"
143 if out and runsvn_showout:
144 print out
145 if pipe.returncode != 0 or (fail_if_stderr and err.strip()):
146 display_error("External program failed (return code %d): %s\n%s"
147 % (pipe.returncode, cmd_string, err))
148 return out
149
150 def svn_date_to_timestamp(svn_date):
151 """
152 Parse an SVN date as read from the XML output and
153 return the corresponding timestamp.
154 """
155 # Strip microseconds and timezone (always UTC, hopefully)
156 # XXX there are various ISO datetime parsing routines out there,
157 # cf. http://seehuhn.de/comp/pdate
158 date = svn_date.split('.', 2)[0]
159 time_tuple = time.strptime(date, "%Y-%m-%dT%H:%M:%S")
160 return calendar.timegm(time_tuple)
161
162 def parse_svn_info_xml(xml_string):
163 """
164 Parse the XML output from an "svn info" command and extract
165 useful information as a dict.
166 """
167 d = {}
168 tree = ET.fromstring(xml_string)
169 entry = tree.find('.//entry')
170 if entry:
171 d['url'] = entry.find('url').text
172 d['revision'] = int(entry.get('revision'))
173 d['repos_url'] = tree.find('.//repository/root').text
174 d['repos_uuid'] = tree.find('.//repository/uuid').text
175 d['last_changed_rev'] = int(tree.find('.//commit').get('revision'))
176 d['kind'] = entry.get('kind')
177 return d
178
179 def parse_svn_log_xml(xml_string):
180 """
181 Parse the XML output from an "svn log" command and extract
182 useful information as a list of dicts (one per log changeset).
183 """
184 l = []
185 tree = ET.fromstring(xml_string)
186 for entry in tree.findall('logentry'):
187 d = {}
188 d['revision'] = int(entry.get('revision'))
189 # Some revisions don't have authors, most notably
190 # the first revision in a repository.
191 author = entry.find('author')
192 d['author'] = author is not None and author.text or None
193 d['date'] = svn_date_to_timestamp(entry.find('date').text)
194 # Some revisions may have empty commit message
195 message = entry.find('msg')
196 message = message is not None and message.text is not None \
197 and message.text.strip() or ""
198 # Replace DOS return '\r\n' and MacOS return '\r' with unix return '\n'
199 d['message'] = message.replace('\r\n', '\n').replace('\n\r', '\n'). \
200 replace('\r', '\n')
201 revprops = []
202 for prop in entry.findall('.//revprops/property'):
203 revprops.append({ 'name': prop.get('name'), 'value': prop.text })
204 d['revprops'] = revprops
205 paths = []
206 for path in entry.findall('.//paths/path'):
207 copyfrom_rev = path.get('copyfrom-rev')
208 if copyfrom_rev:
209 copyfrom_rev = int(copyfrom_rev)
210 paths.append({
211 'path': path.text,
212 'kind': path.get('kind'),
213 'action': path.get('action'),
214 'copyfrom_path': path.get('copyfrom-path'),
215 'copyfrom_revision': copyfrom_rev,
216 })
217 # Need to sort paths (i.e. into hierarchical order), so that process_svn_log_entry()
218 # can process actions in depth-first order.
219 d['changed_paths'] = sorted(paths, key=itemgetter('path'))
220 l.append(d)
221 return l
222
223 def parse_svn_status_xml(xml_string, base_dir=None):
224 """
225 Parse the XML output from an "svn status" command and extract
226 useful info as a list of dicts (one per status entry).
227 """
228 l = []
229 tree = ET.fromstring(xml_string)
230 for entry in tree.findall('.//entry'):
231 d = {}
232 path = entry.get('path')
233 if base_dir is not None:
234 assert path.startswith(base_dir)
235 path = path[len(base_dir):].lstrip('/\\')
236 d['path'] = path
237 wc_status = entry.find('wc-status')
238 if wc_status.get('item') == 'external':
239 d['type'] = 'external'
240 elif wc_status.get('item') == 'deleted':
241 d['type'] = 'deleted'
242 elif wc_status.get('revision') is not None:
243 d['type'] = 'normal'
244 else:
245 d['type'] = 'unversioned'
246 l.append(d)
247 return l
248
249 def get_svn_info(svn_url_or_wc, rev_number=None):
250 """
251 Get SVN information for the given URL or working copy,
252 with an optionally specified revision number.
253 Returns a dict as created by parse_svn_info_xml().
254 """
255 if rev_number is not None:
256 args = [svn_url_or_wc + "@" + str(rev_number)]
257 else:
258 args = [svn_url_or_wc]
259 xml_string = run_svn(svn_info_args + args, fail_if_stderr=True)
260 return parse_svn_info_xml(xml_string)
261
262 def svn_checkout(svn_url, checkout_dir, rev_number=None):
263 """
264 Checkout the given URL at an optional revision number.
265 """
266 args = []
267 if rev_number is not None:
268 args += ['-r', rev_number]
269 args += [svn_url, checkout_dir]
270 return run_svn(svn_checkout_args + args)
271
272 def run_svn_log(svn_url_or_wc, rev_start, rev_end, limit, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
273 """
274 Fetch up to 'limit' SVN log entries between the given revisions.
275 """
276 args = []
277 if stop_on_copy:
278 args += ['--stop-on-copy']
279 if get_changed_paths:
280 args += ['-v']
281 if get_revprops:
282 args += ['--with-all-revprops']
283 url = str(svn_url_or_wc)
284 if rev_start != 'HEAD' and rev_end != 'HEAD':
285 args += ['-r', '%s:%s' % (rev_start, rev_end)]
286 if not "@" in svn_url_or_wc:
287 url += "@" + str(max(rev_start, rev_end))
288 args += ['--limit', str(limit), url]
289 xml_string = run_svn(svn_log_args + args)
290 return parse_svn_log_xml(xml_string)
291
292 def get_svn_status(svn_wc, flags=None):
293 """
294 Get SVN status information about the given working copy.
295 """
296 # Ensure proper stripping by canonicalizing the path
297 svn_wc = os.path.abspath(svn_wc)
298 args = []
299 if flags:
300 args += [flags]
301 args += [svn_wc]
302 xml_string = run_svn(svn_status_args + args)
303 return parse_svn_status_xml(xml_string, svn_wc)
304
305 def get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
306 """
307 Get the first SVN log entry in the requested revision range.
308 """
309 entries = run_svn_log(svn_url, rev_start, rev_end, 1, stop_on_copy, get_changed_paths, get_revprops)
310 if not entries:
311 display_error("No SVN log for %s between revisions %s and %s" %
312 (svn_url, rev_start, rev_end))
313
314 return entries[0]
315
316 def get_first_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
317 """
318 Get the first log entry after/at the given revision number in an SVN branch.
319 By default the revision number is set to 0, which will give you the log
320 entry corresponding to the branch creaction.
321
322 NOTE: to know whether the branch creation corresponds to an SVN import or
323 a copy from another branch, inspect elements of the 'changed_paths' entry
324 in the returned dictionary.
325 """
326 return get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True, get_changed_paths=True)
327
328 def get_last_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
329 """
330 Get the last log entry before/at the given revision number in an SVN branch.
331 By default the revision number is set to HEAD, which will give you the log
332 entry corresponding to the latest commit in branch.
333 """
334 return get_one_svn_log_entry(svn_url, rev_end, rev_start, stop_on_copy=True, get_changed_paths=True)
335
336
337 log_duration_threshold = 10.0
338 log_min_chunk_length = 10
339
340 def iter_svn_log_entries(svn_url, first_rev, last_rev, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
341 """
342 Iterate over SVN log entries between first_rev and last_rev.
343
344 This function features chunked log fetching so that it isn't too nasty
345 to the SVN server if many entries are requested.
346 """
347 cur_rev = first_rev
348 chunk_length = log_min_chunk_length
349 chunk_interval_factor = 1.0
350 while last_rev == "HEAD" or cur_rev <= last_rev:
351 start_t = time.time()
352 stop_rev = min(last_rev, cur_rev + int(chunk_length * chunk_interval_factor))
353 entries = run_svn_log(svn_url, cur_rev, stop_rev, chunk_length, stop_on_copy , get_changed_paths, get_revprops)
354 duration = time.time() - start_t
355 if not entries:
356 if stop_rev == last_rev:
357 break
358 cur_rev = stop_rev + 1
359 chunk_interval_factor *= 2.0
360 continue
361 for e in entries:
362 yield e
363 cur_rev = e['revision'] + 1
364 # Adapt chunk length based on measured request duration
365 if duration < log_duration_threshold:
366 chunk_length = int(chunk_length * 2.0)
367 elif duration > log_duration_threshold * 2:
368 chunk_length = max(log_min_chunk_length, int(chunk_length / 2.0))
369
370 def commit_from_svn_log_entry(entry, files=None, keep_author=False):
371 """
372 Given an SVN log entry and an optional sequence of files, do an svn commit.
373 """
374 # TODO: Run optional external shell hook here, for doing pre-commit filtering
375 # This will use the local timezone for displaying commit times
376 timestamp = int(entry['date'])
377 svn_date = str(datetime.fromtimestamp(timestamp))
378 # Uncomment this one one if you prefer UTC commit times
379 #svn_date = "%d 0" % timestamp
380 if keep_author:
381 options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date, "--username", entry['author']]
382 else:
383 options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date + "\nAuthor: " + entry['author']]
384 if files:
385 options += list(files)
386 print "(Committing source rev #"+str(entry['revision'])+"...)"
387 run_svn(options)
388
389 def in_svn(p):
390 """
391 Check if a given file/folder is being tracked by Subversion.
392 Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
393 With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
394 Use "svn status" to check the status of the file/folder.
395 """
396 # TODO: Is there a better way to do this?
397 entries = get_svn_status(p)
398 if not entries:
399 return False
400 d = entries[0]
401 return (d['type'] == 'normal')
402
403 def find_svn_ancestors(source_repos_url, source_base, source_offset, copyfrom_path, copyfrom_rev):
404 """
405 Given a copy-from path (copyfrom_path), walk the SVN history backwards to inspect
406 the ancestory of that path. Build a collection of copyfrom_path+revision pairs
407 for each of the branch-copies since the initial branch-creation. If we find a
408 copyfrom_path which source_base is a substring match of (e.g. we crawled back to
409 the initial branch-copy from trunk), then return the collection of ancestor paths.
410 Otherwise, copyfrom_path has no ancestory compared to source_base.
411
412 This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
413 file/folder was renamed in a branch and then that branch was merged back to trunk.
414
415 PARAMETERS:
416 * source_repos_url = Full URL to root of repository, e.g. 'file:///path/to/repos'
417 * source_base = e.g. '/trunk'
418 * source_offset = e.g. 'projectA/file1.txt'
419 * copyfrom_path = e.g. '/branches/bug123/projectA/file1.txt'
420 """
421
422 done = False
423 working_path = copyfrom_path
424 working_base = copyfrom_path[:-len(source_offset)].rstrip('/')
425 working_offset = source_offset.strip('/')
426 working_rev = copyfrom_rev
427 ancestors = [{'path': [working_base, working_offset], 'revision': working_rev}]
428 while not done:
429 # Get the first "svn log" entry for this path (relative to @rev)
430 #working_path = working_base + "/" + working_offset
431 if debug:
432 print ">> find_svn_ancestors: " + source_repos_url + working_path + "@" + str(working_rev) + \
433 " (" + working_base + " " + working_offset + ")"
434 log_entry = get_first_svn_log_entry(source_repos_url + working_path + "@" + str(working_rev), 1, str(working_rev), True)
435 if not log_entry:
436 done = True
437 # Find the action for our working_path in this revision
438 for d in log_entry['changed_paths']:
439 path = d['path']
440 if not path in working_path:
441 continue
442 # Check action-type for this file
443 action = d['action']
444 if action not in 'MARD':
445 display_error("In SVN rev. %d: action '%s' not supported. \
446 Please report a bug!" % (log_entry['revision'], action))
447 if debug:
448 debug_desc = ": " + action + " " + path
449 if d['copyfrom_path']:
450 debug_desc += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
451 print debug_desc
452
453 if action == 'R':
454 # If file/folder was replaced, it has no ancestor
455 return []
456 if action == 'D':
457 # If file/folder was deleted, it has no ancestor
458 return []
459 if action == 'A':
460 # If file/folder was added but not a copy, it has no ancestor
461 if not d['copyfrom_path']:
462 return []
463 # Else, file/folder was added and is a copy, so check ancestors
464 path_old = d['copyfrom_path']
465 working_path = working_path.replace(path, path_old)
466 if working_base in working_path:
467 # If the new and old working_path share the same working_base, just need to update working_offset.
468 working_offset = working_path[len(working_base)+1:]
469 else:
470 # Else, assume that working_base has changed but working_offset is the same, e.g. a re-branch.
471 # TODO: Is this a safe assumption?!
472 working_base = working_path[:-len(working_offset)].rstrip('/')
473 working_rev = d['copyfrom_revision']
474 if debug:
475 print ">> find_svn_ancestors: copy-from: " + working_base + " " + working_offset + "@" + str(working_rev)
476 ancestors.append({'path': [working_base, working_offset], 'revision': working_rev})
477 # If we found a copy-from case which matches our source_base, we're done
478 if (path_old == source_base) or (path_old.startswith(source_base + "/")):
479 return ancestors
480 # Else, follow the copy and keep on searching
481 break
482 return None
483
484 def get_rev_map(rev_map, src_rev):
485 """
486 Find the equivalent rev # in the target repo for the given rev # from the source repo.
487 """
488
489 # Find the highest entry less-than-or-equal-to src_rev
490 for rev in range(src_rev+1, 1, -1):
491 if debug:
492 print ">> get_rev_map: rev="+str(rev)+" in_rev_map="+str(rev in rev_map)
493 if rev in rev_map:
494 return rev_map[rev]
495 # Else, we fell off the bottom of the rev_map. Ruh-roh...
496 display_error("Internal Error: get_rev_map: Unable to find match rev_map entry for src_rev=" + src_rev)
497
498 def replay_svn_ancestors(ancestors, source_repos_url, source_url, target_url, rev_map):
499 """
500 Given an array of ancestor info (find_svn_ancestors), replay the history
501 to correctly track renames ("svn copy/move") across branch-merges.
502
503 For example, consider a sequence of events like this:
504 1. svn copy /trunk /branches/fix1
505 2. (Make some changes on /branches/fix1)
506 3. svn mv /branches/fix1/Proj1 /branches/fix1/Proj2 " Rename folder
507 4. svn mv /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt " Rename file inside renamed folder
508 5. svn co /trunk && svn merge /branches/fix1
509 After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
510 and and add of /trunk/Proj2 comp-from /branches/fix1/Proj2. If we were just
511 to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
512 we'd lose the logical history that Proj2/file2.txt is really a descendant
513 of Proj1/file1.txt.
514
515 'source_repos_url' is the full URL to the root of the source repository.
516 'ancestors' is the array returned by find_svn_ancestors() with the final
517 destination info appended to it by process_svn_log_entry().
518 'dest_path'
519 """
520
521 # Ignore ancestors[0], which is the original (pre-branch-copy) trunk path
522 # Ignore ancestors[1], which is the original branch-creation commit
523 # Ignore ancestors[n], which is the final commit back to trunk
524 for idx in range(1, len(ancestors)-1):
525 ancestor = ancestors[idx]
526 source_base = ancestor['path'][0]
527 source_offset = ancestor['path'][1]
528 source_path = source_base + "/" + source_offset
529 source_rev = ancestor['revision']
530 source_rev_next = ancestors[idx+1]['revision']
531 # Do a "svn log" on the _parent_ directory of source_path, since trying to get log info
532 # for the "old path" on the revision where the copy/move happened will fail.
533 if "/" in source_path:
534 p_source_path = source_path[:source_path.rindex('/')]
535 else:
536 p_source_path = ""
537 if debug:
538 print ">> replay_svn_ancestors: ["+str(idx)+"]" + source_path+"@"+str(source_rev) + " ["+p_source_path+"@"+str(source_rev)+":"+str(source_rev_next-1)+"]"
539 it_log_entries = iter_svn_log_entries(source_repos_url+p_source_path, source_rev, source_rev_next-1)
540 for log_entry in it_log_entries:
541 #print ">> replay_svn_ancestors: log_entry: (" + source_repos_url+source_base + ")"
542 #print log_entry
543 removed_paths = []
544 process_svn_log_entry(log_entry, source_repos_url, source_repos_url+source_base, target_url,
545 rev_map, removed_paths, [], True)
546 # Process any deferred removed actions
547 if removed_paths:
548 source_base = source_url[len(source_repos_url):]
549 for path_offset in removed_paths:
550 if svnlog_verbose:
551 print " D " + source_base+"/"+path_offset
552 run_svn(["remove", "--force", path_offset])
553
554 def process_svn_log_entry(log_entry, source_repos_url, source_url, target_url, rev_map, removed_paths = [], commit_paths = [], is_ancestors_replay = False):
555 """
556 Process SVN changes from the given log entry.
557 Returns array of all the paths in the working-copy that were changed,
558 i.e. the paths which need to be "svn commit".
559
560 'log_entry' is the array structure built by parse_svn_log_xml().
561 'source_repos_url' is the full URL to the root of the source repository.
562 'source_url' is the full URL to the source path in the source repository.
563 'target_url' is the full URL to the target path in the target repository.
564 'rev_map' is the running mapping-table dictionary for source-repo rev #'s
565 to the equivalent target-repo rev #'s.
566 'removed_paths' is the working list of deferred deletions.
567 'commit_paths' is the working list of specific paths which changes to pass
568 to the final "svn commit".
569 """
570 # Get the relative offset of source_url based on source_repos_url, e.g. u'/branches/bug123'
571 source_base = source_url[len(source_repos_url):]
572 if debug:
573 print ">> process_svn_log_entry: " + source_url + " (" + source_base + ")"
574
575 svn_rev = log_entry['revision']
576 # Get current target revision, for "svn copy" support
577 dup_info = get_svn_info(target_url)
578 dup_rev = dup_info['revision']
579
580 unrelated_paths = []
581
582 for d in log_entry['changed_paths']:
583 # Get the full path for this changed_path
584 # e.g. u'/branches/bug123/projectA/file1.txt'
585 path = d['path']
586 if not path.startswith(source_base + "/"):
587 # Ignore changed files that are not part of this subdir
588 if path != source_base:
589 print ">> process_svn_log_entry: Unrelated path: " + path + " (" + source_base + ")"
590 unrelated_paths.append(path)
591 continue
592 # Calculate the offset (based on source_base) for this changed_path
593 # e.g. u'projectA/file1.txt'
594 # (path = source_base + "/" + path_offset)
595 path_offset = path[len(source_base):].strip("/")
596 # Get the action for this path
597 action = d['action']
598 if action not in 'MARD':
599 display_error("In SVN rev. %d: action '%s' not supported. \
600 Please report a bug!" % (svn_rev, action))
601
602 # Try to be efficient and keep track of an explicit list of paths in the
603 # working copy that changed. If we commit from the root of the working copy,
604 # then SVN needs to crawl the entire working copy looking for pending changes.
605 # But, if we gather too many paths to commit, then we wipe commit_paths below
606 # and end-up doing a commit at the root of the working-copy.
607 if len (commit_paths) < 100:
608 commit_paths.append(path_offset)
609
610 # Special-handling for replace's
611 if action == 'R':
612 if svnlog_verbose:
613 msg = " " + action + " " + d['path']
614 if d['copyfrom_path']:
615 msg += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
616 print msg
617 # If file was "replaced" (deleted then re-added, all in same revision),
618 # then we need to run the "svn rm" first, then change action='A'. This
619 # lets the normal code below handle re-"svn add"'ing the files. This
620 # should replicate the "replace".
621 run_svn(["remove", "--force", path_offset])
622 action = 'A'
623
624 # Handle all the various action-types
625 # (Handle "add" first, for "svn copy/move" support)
626 if action == 'A':
627 if svnlog_verbose:
628 msg = " " + action + " " + d['path']
629 if d['copyfrom_path']:
630 msg += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
631 print msg
632 # If we have any queued deletions for this same path, remove those if we're re-adding this path.
633 if path_offset in removed_paths:
634 removed_paths.remove(path_offset)
635 # Determine where to export from
636 copyfrom_rev = svn_rev
637 copyfrom_path = path
638 svn_copy = False
639 # Handle cases where this "add" was a copy from another URL in the source repos
640 if d['copyfrom_revision']:
641 copyfrom_rev = d['copyfrom_revision']
642 copyfrom_path = d['copyfrom_path']
643 if debug:
644 print ">> process_svn_log_entry: Check copy-from: " + source_base + " " + path_offset
645 if source_base in copyfrom_path:
646 # The copy-from path is inside the current source_base, no need to check ancestry.
647 ancestors = []
648 copyfrom_offset = copyfrom_path[len(source_base):].strip("/")
649 if debug:
650 print ">> process_svn_log_entry: Found copy: " + copyfrom_path+"@"+str(copyfrom_rev)
651 svn_copy = True
652 else:
653 # Check if the copy-from path has ancestors which chain back to the current source_base
654 ancestors = find_svn_ancestors(source_repos_url, source_base, path_offset,
655 copyfrom_path, copyfrom_rev)
656 if ancestors:
657 # The copy-from path has ancestory back to source_base. Setup info
658 # for the latter replay_svn_ancestors() call, which will walk the
659 # ancestry from start to end, replaying any interimediate actions,
660 # e.g. handling file renames within a renamed parent folder.
661 # Reverse the list, so that we loop in chronological order
662 ancestors.reverse()
663 # Append the current revision
664 ancestors.append({'path': [source_base, path_offset], 'revision': svn_rev})
665 # ancestors[0] is the original (pre-branch-copy) trunk path.
666 # ancestors[1] is the first commit on the new branch.
667 copyfrom_rev = ancestors[0]['revision']
668 copyfrom_base = ancestors[0]['path'][0]
669 copyfrom_offset = ancestors[0]['path'][1]
670 copyfrom_path = copyfrom_base + "/" + copyfrom_offset
671 if debug:
672 print ">> process_svn_log_entry: FOUND PARENT:"
673 for idx in range(0,len(ancestors)):
674 ancestor = ancestors[idx]
675 print " ["+str(idx)+"] " + ancestor['path'][0]+" "+ancestor['path'][1]+"@"+str(ancestor['revision'])
676 print ">> process_svn_log_entry: copyfrom_path: " + copyfrom_path
677 svn_copy = True
678 # If this add was a copy-from, do a smart replay of the ancestors' history.
679 if svn_copy:
680 if debug:
681 print ">> process_svn_log_entry: svn_copy: copy-from: " + copyfrom_path+"@"+str(copyfrom_rev) + " source_base: "+source_base + " len(ancestors): " + str(len(ancestors))
682 if ancestors and d['kind'] == 'dir':
683 # Replay any actions which happened to this folder from the ancestor path(s).
684 replay_svn_ancestors(ancestors, source_repos_url, source_url, target_url, rev_map)
685 else:
686 # For files (non-folders), no need to replay_svn_ancestors, since there isn't any kind
687 # of "dependent" history we might need to replay like for folders.
688 if is_ancestors_replay and os.path.exists(copyfrom_offset):
689 # If we're replaying ancestory from a branch, try to do local working-copy
690 # copies first, because interim renames won't exist in target_url.
691 run_svn(["copy", copyfrom_offset, path_offset])
692 else:
693 # Copy this path from the equivalent path+rev in the target repo, to create the
694 # equivalent history.
695 tgt_rev = get_rev_map(rev_map, copyfrom_rev)
696 if debug:
697 print ">> get_rev_map: " + str(copyfrom_rev) + " (source) -> " + str(tgt_rev) + " (target)"
698 run_svn(["copy", "-r", tgt_rev, target_url+"/"+copyfrom_offset+"@"+str(tgt_rev), path_offset])
699 if d['kind'] == 'file':
700 # Export the final file from the source repo, to make sure to get any modifications
701 # which might have happened as part of this source commit.
702 run_svn(["export", "--force", "-r", str(svn_rev),
703 source_repos_url + path + "@" + str(svn_rev), path_offset])
704 # Else just "svn export" the files from the source repo and "svn add" them.
705 else:
706 # Create (parent) directory if needed
707 if d['kind'] == 'dir':
708 p_path = path_offset
709 else:
710 p_path = os.path.dirname(path_offset).strip() or '.'
711 if not os.path.exists(p_path):
712 os.makedirs(p_path)
713 # Export the entire added tree.
714 run_svn(["export", "--force", "-r", str(copyfrom_rev),
715 source_repos_url + copyfrom_path + "@" + str(copyfrom_rev), path_offset])
716 if not in_svn(path_offset):
717 run_svn(["add", "--parents", path_offset])
718 # TODO: Need to copy SVN properties from source repos
719
720 elif action == 'D':
721 # Queue "svn remove" commands, to allow the action == 'A' handling the opportunity
722 # to do smart "svn copy" handling on copy/move/renames.
723 if not path_offset in removed_paths:
724 removed_paths.append(path_offset)
725
726 elif action == 'M':
727 if svnlog_verbose:
728 print " " + action + " " + d['path']
729 out = run_svn(["merge", "-c", str(svn_rev), "--non-recursive",
730 "--non-interactive", "--accept=theirs-full",
731 source_url+"/"+path_offset+"@"+str(svn_rev), path_offset])
732
733 else:
734 display_error("Internal Error: process_svn_log_entry: Unhandled 'action' value: '" + action + "'")
735
736 if unrelated_paths:
737 print "Unrelated paths: (vs. '" + source_base + "')"
738 print "*", unrelated_paths
739
740 return commit_paths
741
742 def pull_svn_rev(log_entry, source_repos_url, source_repos_uuid, source_url, target_url, rev_map, keep_author=False):
743 """
744 Pull SVN changes from the given log entry.
745 Returns the new SVN revision.
746 If an exception occurs, it will rollback to revision 'svn_rev - 1'.
747 """
748 svn_rev = log_entry['revision']
749 print "\n(Starting source rev #"+str(svn_rev)+":)"
750 print "r"+str(log_entry['revision']) + " | " + \
751 log_entry['author'] + " | " + \
752 str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' '))
753 print log_entry['message']
754 print "------------------------------------------------------------------------"
755
756 # Process all the paths in this log entry
757 removed_paths = []
758 commit_paths = []
759 process_svn_log_entry(log_entry, source_repos_url, source_url, target_url, rev_map, removed_paths, commit_paths)
760 # Process any deferred removed actions
761 if removed_paths:
762 source_base = source_url[len(source_repos_url):]
763 for path_offset in removed_paths:
764 if svnlog_verbose:
765 print " D " + source_base+"/"+path_offset
766 run_svn(["remove", "--force", path_offset])
767
768 # If we had too many individual paths to commit, wipe the list and just commit at
769 # the root of the working copy.
770 if len (commit_paths) > 99:
771 commit_paths = []
772
773 # TODO: Use SVN properties to track source URL + rev in the target repo?
774 # This would provide a more reliable resume-support
775 try:
776 commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author)
777 except ExternalCommandFailed:
778 # try to ignore the Properties conflicts on files and dirs
779 # use the copy from original_wc
780 # TODO: Need to re-work this?
781 #has_Conflict = False
782 #for d in log_entry['changed_paths']:
783 # p = d['path']
784 # p = p[len(source_base):].strip("/")
785 # if os.path.isfile(p):
786 # if os.path.isfile(p + ".prej"):
787 # has_Conflict = True
788 # shutil.copy(original_wc + os.sep + p, p)
789 # p2=os.sep + p.replace('_', '__').replace('/', '_') \
790 # + ".prej-" + str(svn_rev)
791 # shutil.move(p + ".prej", os.path.dirname(original_wc) + p2)
792 # w="\n### Properties conflicts ignored:"
793 # print "%s %s, in revision: %s\n" % (w, p, svn_rev)
794 # elif os.path.isdir(p):
795 # if os.path.isfile(p + os.sep + "dir_conflicts.prej"):
796 # has_Conflict = True
797 # p2=os.sep + p.replace('_', '__').replace('/', '_') \
798 # + "_dir__conflicts.prej-" + str(svn_rev)
799 # shutil.move(p + os.sep + "dir_conflicts.prej",
800 # os.path.dirname(original_wc) + p2)
801 # w="\n### Properties conflicts ignored:"
802 # print "%s %s, in revision: %s\n" % (w, p, svn_rev)
803 # out = run_svn(["propget", "svn:ignore",
804 # original_wc + os.sep + p])
805 # if out:
806 # run_svn(["propset", "svn:ignore", out.strip(), p])
807 # out = run_svn(["propget", "svn:externel",
808 # original_wc + os.sep + p])
809 # if out:
810 # run_svn(["propset", "svn:external", out.strip(), p])
811 ## try again
812 #if has_Conflict:
813 # commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author)
814 #else:
815 raise ExternalCommandFailed
816
817 # Add source-tracking revprop's
818 run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_uuid", source_repos_uuid])
819 run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_url", source_url])
820 run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_rev", svn_rev])
821 print "(Finished source rev #"+str(svn_rev)+")"
822
823
824 def main():
825 usage = "Usage: %prog [-a] [-c] [-r SVN rev] <Source SVN URL> <Target SVN URL>"
826 parser = OptionParser(usage)
827 parser.add_option("-a", "--keep-author", action="store_true",
828 dest="keep_author", help="Keep revision Author or not")
829 parser.add_option("-c", "--continue-from-break", action="store_true",
830 dest="cont_from_break",
831 help="Continue from previous break")
832 parser.add_option("-r", "--svn-rev", type="int", dest="svn_rev",
833 help="SVN revision to checkout from")
834 (options, args) = parser.parse_args()
835 if len(args) != 2:
836 display_error("incorrect number of arguments\n\nTry: svn2svn.py --help",
837 False)
838
839 source_url = args.pop(0).rstrip("/")
840 target_url = args.pop(0).rstrip("/")
841 if options.keep_author:
842 keep_author = True
843 else:
844 keep_author = False
845
846 # Find the greatest_rev in the source repo
847 svn_info = get_svn_info(source_url)
848 greatest_rev = svn_info['revision']
849 # Get the base URL for the source repos, e.g. u'svn://svn.example.com/svn/repo'
850 source_repos_url = svn_info['repos_url']
851 # Get the UUID for the source repos
852 source_repos_uuid = svn_info['repos_uuid']
853
854 dup_wc = "_dup_wc"
855 rev_map = {}
856
857 # if old working copy does not exist, disable continue mode
858 # TODO: Better continue support. Maybe include source repo's rev # in target commit info?
859 if not os.path.exists(dup_wc):
860 options.cont_from_break = False
861
862 if not options.cont_from_break:
863 # Warn if Target SVN URL existed
864 cmd = find_program("svn")
865 pipe = Popen([cmd] + ["list"] + [target_url], executable=cmd,
866 stdout=PIPE, stderr=PIPE)
867 out, err = pipe.communicate()
868 if pipe.returncode == 0:
869 print "Target SVN URL: %s existed!" % target_url
870 if out:
871 print out
872 print "Press 'Enter' to Continue, 'Ctrl + C' to Cancel..."
873 print "(Timeout in 5 seconds)"
874 rfds, wfds, efds = select.select([sys.stdin], [], [], 5)
875
876 # Get log entry for the SVN revision we will check out
877 if options.svn_rev:
878 # If specify a rev, get log entry just before or at rev
879 svn_start_log = get_last_svn_log_entry(source_url, 1, options.svn_rev, False)
880 else:
881 # Otherwise, get log entry of branch creation
882 # TODO: This call is *very* expensive on a repo with lots of revisions.
883 # Even though the call is passing --limit 1, it seems like that limit-filter
884 # is happening after SVN has fetched the full log history.
885 svn_start_log = get_first_svn_log_entry(source_url, 1, greatest_rev, False)
886
887 # This is the revision we will start from for source_url
888 svn_rev = svn_start_log['revision']
889
890 # Check out a working copy of target_url
891 dup_wc = os.path.abspath(dup_wc)
892 if os.path.exists(dup_wc):
893 shutil.rmtree(dup_wc)
894 svn_checkout(target_url, dup_wc)
895 os.chdir(dup_wc)
896
897 # For the initial commit to the target URL, export all the contents from
898 # the source URL at the start-revision.
899 paths = run_svn(["list", "-r", str(svn_rev), source_url+"@"+str(svn_rev)])
900 paths = paths.strip("\n").split("\n")
901 for path in paths:
902 if not path:
903 # Skip null lines
904 break
905 # Directories have a trailing slash in the "svn list" output
906 if path[-1] == "/":
907 path=path.rstrip('/')
908 if not os.path.exists(path):
909 os.makedirs(path)
910 run_svn(["export", "--force", "-r" , str(svn_rev), source_url+"/"+path+"@"+str(svn_rev), path])
911 run_svn(["add", path])
912 commit_from_svn_log_entry(svn_start_log, [], keep_author)
913 # Add source-tracking revprop's
914 run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_uuid", source_repos_uuid])
915 run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_url", source_url])
916 run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_rev", svn_rev])
917 else:
918 dup_wc = os.path.abspath(dup_wc)
919 os.chdir(dup_wc)
920 # TODO: Need better resume support. For the time being, expect caller explictly passes in resume revision.
921 svn_rev = options.svn_rev
922 if svn_rev < 1:
923 display_error("Invalid arguments\n\nNeed to pass result rev # (-r) when using continue-mode (-c)", False)
924
925 # Load SVN log starting from svn_rev + 1
926 it_log_entries = iter_svn_log_entries(source_url, svn_rev + 1, greatest_rev)
927
928 try:
929 for log_entry in it_log_entries:
930 # Replay this revision from source_url into target_url
931 pull_svn_rev(log_entry, source_repos_url, source_repos_uuid, source_url, target_url, rev_map, keep_author)
932 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
933 run_svn(["up", dup_wc])
934 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
935 dup_info = get_svn_info(target_url)
936 dup_rev = dup_info['revision']
937 svn_rev = log_entry['revision']
938 rev_map[svn_rev] = dup_rev
939
940 except KeyboardInterrupt:
941 print "\nStopped by user."
942 run_svn(["cleanup"])
943 run_svn(["revert", "--recursive", "."])
944 except:
945 print "\nCommand failed with following error:\n"
946 traceback.print_exc()
947 run_svn(["cleanup"])
948 run_svn(["revert", "--recursive", "."])
949 finally:
950 run_svn(["up"])
951 print "\nFinished!"
952
953
954 if __name__ == "__main__":
955 main()
956
957 # vim:sts=4:sw=4: