]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn.py
Major rewrite for replay_svn_ancestors() -> replay_svn_copyfrom()
[svn2svn.git] / svn2svn.py
1 #!/usr/bin/env python
2 """
3 svn2svn.py
4
5 Replicate (replay) changesets from one SVN repository to another:
6 * Maintains full logical history (e.g. uses "svn copy" for renames).
7 * Maintains original commit messages.
8 * Cannot maintain original commit date, but appends original commit date
9 for each commit message: "Date: %d".
10 * Optionally maintain source author info. (Only supported if accessing
11 target SVN repo via file://)
12 * Optionally run an external shell script before each replayed commit
13 to give the ability to dynamically exclude or modify files as part
14 of the replay.
15
16 License: GPLv2, the same as hgsvn.
17 Author: Tony Duckles (https://github.com/tonyduckles/svn2svn)
18 (This is a forked and modified verison of http://code.google.com/p/svn2svn/)
19 """
20
21 import os
22 import sys
23 import time
24 import locale
25 import shutil
26 import select
27 import calendar
28 import traceback
29
30 from optparse import OptionParser
31 from subprocess import Popen, PIPE
32 from datetime import datetime
33 from operator import itemgetter
34
35 try:
36 from xml.etree import cElementTree as ET
37 except ImportError:
38 try:
39 from xml.etree import ElementTree as ET
40 except ImportError:
41 try:
42 import cElementTree as ET
43 except ImportError:
44 from elementtree import ElementTree as ET
45
46 svn_log_args = ['log', '--xml']
47 svn_info_args = ['info', '--xml']
48 svn_checkout_args = ['checkout', '-q']
49 svn_status_args = ['status', '--xml', '-v', '--ignore-externals']
50
51 # Setup debug options
52 debug = False
53 debug_runsvn_timing = False # Display how long each "svn" OS command took to run?
54 # Setup verbosity options
55 runsvn_showcmd = False # Display every "svn" OS command we run?
56 runsvn_showout = False # Display the stdout results from every "svn" OS command we run?
57 svnlog_verbose = True # Display each action + changed-path as we walk the history?
58
59 # define exception class
60 class ExternalCommandFailed(RuntimeError):
61 """
62 An external command failed.
63 """
64
65 def display_error(message, raise_exception = True):
66 """
67 Display error message, then terminate.
68 """
69 print "Error:", message
70 print
71 if raise_exception:
72 raise ExternalCommandFailed
73 else:
74 sys.exit(1)
75
76 # Windows compatibility code by Bill Baxter
77 if os.name == "nt":
78 def find_program(name):
79 """
80 Find the name of the program for Popen.
81 Windows is finnicky about having the complete file name. Popen
82 won't search the %PATH% for you automatically.
83 (Adapted from ctypes.find_library)
84 """
85 # See MSDN for the REAL search order.
86 base, ext = os.path.splitext(name)
87 if ext:
88 exts = [ext]
89 else:
90 exts = ['.bat', '.exe']
91 for directory in os.environ['PATH'].split(os.pathsep):
92 for e in exts:
93 fname = os.path.join(directory, base + e)
94 if os.path.exists(fname):
95 return fname
96 return None
97 else:
98 def find_program(name):
99 """
100 Find the name of the program for Popen.
101 On Unix, popen isn't picky about having absolute paths.
102 """
103 return name
104
105 def shell_quote(s):
106 if runsvn_showcmd:
107 import re
108 p = re.compile('^[A-Za-z0-9=-]+$')
109 if p.match(s):
110 return s
111 if os.name == "nt":
112 q = '"'
113 else:
114 q = "'"
115 return q + s.replace('\\', '\\\\').replace("'", "'\"'\"'") + q
116
117 locale_encoding = locale.getpreferredencoding()
118
119 def run_svn(args, fail_if_stderr=False, encoding="utf-8"):
120 """
121 Run svn cmd in PIPE
122 exit if svn cmd failed
123 """
124 def _transform_arg(a):
125 if isinstance(a, unicode):
126 a = a.encode(encoding or locale_encoding)
127 elif not isinstance(a, str):
128 a = str(a)
129 return a
130 t_args = map(_transform_arg, args)
131
132 cmd = find_program("svn")
133 cmd_string = str(" ".join(map(shell_quote, [cmd] + t_args)))
134 if runsvn_showcmd:
135 # Default to bright-blue for svn commands that will take action on the working-copy.
136 color = "94"
137 # For status-only commands (or commands that aren't important to highlight), show in dim-blue.
138 status_cmds = ['status', 'st', 'log', 'info', 'list', 'propset', 'update', 'up', 'cleanup', 'revert']
139 if args[0] in status_cmds:
140 color = "34"
141 print "\x1b[34m"+"$"+"\x1b["+color+"m", cmd_string + "\x1b[0m"
142 if debug_runsvn_timing:
143 time1 = time.time()
144 pipe = Popen([cmd] + t_args, executable=cmd, stdout=PIPE, stderr=PIPE)
145 out, err = pipe.communicate()
146 if debug_runsvn_timing:
147 time2 = time.time()
148 print "(" + str(round(time2-time1,4)) + " elapsed)"
149 if out and runsvn_showout:
150 print out
151 if pipe.returncode != 0 or (fail_if_stderr and err.strip()):
152 display_error("External program failed (return code %d): %s\n%s"
153 % (pipe.returncode, cmd_string, err))
154 return out
155
156 def svn_date_to_timestamp(svn_date):
157 """
158 Parse an SVN date as read from the XML output and
159 return the corresponding timestamp.
160 """
161 # Strip microseconds and timezone (always UTC, hopefully)
162 # XXX there are various ISO datetime parsing routines out there,
163 # cf. http://seehuhn.de/comp/pdate
164 date = svn_date.split('.', 2)[0]
165 time_tuple = time.strptime(date, "%Y-%m-%dT%H:%M:%S")
166 return calendar.timegm(time_tuple)
167
168 def parse_svn_info_xml(xml_string):
169 """
170 Parse the XML output from an "svn info" command and extract
171 useful information as a dict.
172 """
173 d = {}
174 tree = ET.fromstring(xml_string)
175 entry = tree.find('.//entry')
176 if entry:
177 d['url'] = entry.find('url').text
178 d['revision'] = int(entry.get('revision'))
179 d['repos_url'] = tree.find('.//repository/root').text
180 d['repos_uuid'] = tree.find('.//repository/uuid').text
181 d['last_changed_rev'] = int(tree.find('.//commit').get('revision'))
182 d['kind'] = entry.get('kind')
183 return d
184
185 def parse_svn_log_xml(xml_string):
186 """
187 Parse the XML output from an "svn log" command and extract
188 useful information as a list of dicts (one per log changeset).
189 """
190 l = []
191 tree = ET.fromstring(xml_string)
192 for entry in tree.findall('logentry'):
193 d = {}
194 d['revision'] = int(entry.get('revision'))
195 # Some revisions don't have authors, most notably
196 # the first revision in a repository.
197 author = entry.find('author')
198 d['author'] = author is not None and author.text or None
199 d['date'] = svn_date_to_timestamp(entry.find('date').text)
200 # Some revisions may have empty commit message
201 message = entry.find('msg')
202 message = message is not None and message.text is not None \
203 and message.text.strip() or ""
204 # Replace DOS return '\r\n' and MacOS return '\r' with unix return '\n'
205 d['message'] = message.replace('\r\n', '\n').replace('\n\r', '\n'). \
206 replace('\r', '\n')
207 revprops = []
208 for prop in entry.findall('.//revprops/property'):
209 revprops.append({ 'name': prop.get('name'), 'value': prop.text })
210 d['revprops'] = revprops
211 paths = []
212 for path in entry.findall('.//paths/path'):
213 copyfrom_rev = path.get('copyfrom-rev')
214 if copyfrom_rev:
215 copyfrom_rev = int(copyfrom_rev)
216 paths.append({
217 'path': path.text,
218 'kind': path.get('kind'),
219 'action': path.get('action'),
220 'copyfrom_path': path.get('copyfrom-path'),
221 'copyfrom_revision': copyfrom_rev,
222 })
223 # Need to sort paths (i.e. into hierarchical order), so that process_svn_log_entry()
224 # can process actions in depth-first order.
225 d['changed_paths'] = sorted(paths, key=itemgetter('path'))
226 l.append(d)
227 return l
228
229 def parse_svn_status_xml(xml_string, base_dir=None):
230 """
231 Parse the XML output from an "svn status" command and extract
232 useful info as a list of dicts (one per status entry).
233 """
234 l = []
235 tree = ET.fromstring(xml_string)
236 for entry in tree.findall('.//entry'):
237 d = {}
238 path = entry.get('path')
239 if base_dir is not None:
240 assert path.startswith(base_dir)
241 path = path[len(base_dir):].lstrip('/\\')
242 d['path'] = path
243 wc_status = entry.find('wc-status')
244 if wc_status.get('item') == 'external':
245 d['type'] = 'external'
246 elif wc_status.get('item') == 'deleted':
247 d['type'] = 'deleted'
248 elif wc_status.get('revision') is not None:
249 d['type'] = 'normal'
250 else:
251 d['type'] = 'unversioned'
252 l.append(d)
253 return l
254
255 def get_svn_info(svn_url_or_wc, rev_number=None):
256 """
257 Get SVN information for the given URL or working copy,
258 with an optionally specified revision number.
259 Returns a dict as created by parse_svn_info_xml().
260 """
261 if rev_number is not None:
262 args = [svn_url_or_wc + "@" + str(rev_number)]
263 else:
264 args = [svn_url_or_wc]
265 xml_string = run_svn(svn_info_args + args, fail_if_stderr=True)
266 return parse_svn_info_xml(xml_string)
267
268 def svn_checkout(svn_url, checkout_dir, rev_number=None):
269 """
270 Checkout the given URL at an optional revision number.
271 """
272 args = []
273 if rev_number is not None:
274 args += ['-r', rev_number]
275 args += [svn_url, checkout_dir]
276 return run_svn(svn_checkout_args + args)
277
278 def run_svn_log(svn_url_or_wc, rev_start, rev_end, limit, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
279 """
280 Fetch up to 'limit' SVN log entries between the given revisions.
281 """
282 args = []
283 if stop_on_copy:
284 args += ['--stop-on-copy']
285 if get_changed_paths:
286 args += ['-v']
287 if get_revprops:
288 args += ['--with-all-revprops']
289 url = str(svn_url_or_wc)
290 if rev_start != 'HEAD' and rev_end != 'HEAD':
291 args += ['-r', '%s:%s' % (rev_start, rev_end)]
292 if not "@" in svn_url_or_wc:
293 url += "@" + str(max(rev_start, rev_end))
294 args += ['--limit', str(limit), url]
295 xml_string = run_svn(svn_log_args + args)
296 return parse_svn_log_xml(xml_string)
297
298 def get_svn_status(svn_wc, flags=None):
299 """
300 Get SVN status information about the given working copy.
301 """
302 # Ensure proper stripping by canonicalizing the path
303 svn_wc = os.path.abspath(svn_wc)
304 args = []
305 if flags:
306 args += [flags]
307 args += [svn_wc]
308 xml_string = run_svn(svn_status_args + args)
309 return parse_svn_status_xml(xml_string, svn_wc)
310
311 def get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
312 """
313 Get the first SVN log entry in the requested revision range.
314 """
315 entries = run_svn_log(svn_url, rev_start, rev_end, 1, stop_on_copy, get_changed_paths, get_revprops)
316 if not entries:
317 display_error("No SVN log for %s between revisions %s and %s" %
318 (svn_url, rev_start, rev_end))
319
320 return entries[0]
321
322 def get_first_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
323 """
324 Get the first log entry after/at the given revision number in an SVN branch.
325 By default the revision number is set to 0, which will give you the log
326 entry corresponding to the branch creaction.
327
328 NOTE: to know whether the branch creation corresponds to an SVN import or
329 a copy from another branch, inspect elements of the 'changed_paths' entry
330 in the returned dictionary.
331 """
332 return get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True, get_changed_paths=True)
333
334 def get_last_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
335 """
336 Get the last log entry before/at the given revision number in an SVN branch.
337 By default the revision number is set to HEAD, which will give you the log
338 entry corresponding to the latest commit in branch.
339 """
340 return get_one_svn_log_entry(svn_url, rev_end, rev_start, stop_on_copy=True, get_changed_paths=True)
341
342
343 log_duration_threshold = 10.0
344 log_min_chunk_length = 10
345
346 def iter_svn_log_entries(svn_url, first_rev, last_rev, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
347 """
348 Iterate over SVN log entries between first_rev and last_rev.
349
350 This function features chunked log fetching so that it isn't too nasty
351 to the SVN server if many entries are requested.
352 """
353 cur_rev = first_rev
354 chunk_length = log_min_chunk_length
355 chunk_interval_factor = 1.0
356 while last_rev == "HEAD" or cur_rev <= last_rev:
357 start_t = time.time()
358 stop_rev = min(last_rev, cur_rev + int(chunk_length * chunk_interval_factor))
359 entries = run_svn_log(svn_url, cur_rev, stop_rev, chunk_length, stop_on_copy , get_changed_paths, get_revprops)
360 duration = time.time() - start_t
361 if not entries:
362 if stop_rev == last_rev:
363 break
364 cur_rev = stop_rev + 1
365 chunk_interval_factor *= 2.0
366 continue
367 for e in entries:
368 yield e
369 cur_rev = e['revision'] + 1
370 # Adapt chunk length based on measured request duration
371 if duration < log_duration_threshold:
372 chunk_length = int(chunk_length * 2.0)
373 elif duration > log_duration_threshold * 2:
374 chunk_length = max(log_min_chunk_length, int(chunk_length / 2.0))
375
376 def commit_from_svn_log_entry(entry, files=None, keep_author=False):
377 """
378 Given an SVN log entry and an optional sequence of files, do an svn commit.
379 """
380 # TODO: Run optional external shell hook here, for doing pre-commit filtering
381 # This will use the local timezone for displaying commit times
382 timestamp = int(entry['date'])
383 svn_date = str(datetime.fromtimestamp(timestamp))
384 # Uncomment this one one if you prefer UTC commit times
385 #svn_date = "%d 0" % timestamp
386 if keep_author:
387 options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date, "--username", entry['author']]
388 else:
389 options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date + "\nAuthor: " + entry['author']]
390 if files:
391 options += list(files)
392 print "(Committing source rev #"+str(entry['revision'])+"...)"
393 run_svn(options)
394
395 def in_svn(p):
396 """
397 Check if a given file/folder is being tracked by Subversion.
398 Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
399 With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
400 Use "svn status" to check the status of the file/folder.
401 """
402 # TODO: Is there a better way to do this?
403 entries = get_svn_status(p)
404 if not entries:
405 return False
406 d = entries[0]
407 return (d['type'] == 'normal')
408
409 def find_svn_ancestors(source_repos_url, source_url, path_base, path_offset, path_rev, \
410 copyfrom_path, copyfrom_rev, prefix = ""):
411 """
412 Given a final svn-add'd path (path_base+"/"+path_offset) and the origin copy-from
413 path (copyfrom_path), walk the SVN history backwards to inspect the ancestory of
414 that path. Build a collection of copyfrom_path+revision pairs for each of the
415 branch-copies since the initial branch-creation. If we find a copyfrom_path which
416 source_url is a substring match of (e.g. we crawled back to the initial branch-
417 copy from trunk), then return the collection of ancestor paths. Otherwise,
418 copyfrom_path has no ancestory compared to source_url.
419
420 This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
421 file/folder was renamed in a branch and then that branch was merged back to trunk.
422
423 'source_repos_url' is the full URL to the root of the source repository,
424 e.g. 'file:///path/to/repo'
425 'source_url' is the full URL to the source path in the source repository.
426 'path_base' is base offset from source_repos_url that we did a "svn log" on.
427 This is usually the same offset off source_url vs. source_repos_url, but
428 in cases where replay_svn_ancestors is calling process_svn_log_entry
429 our path_base might be a branch folder rather than trunk.
430 e.g. '/trunk'
431 'path_offset' is the offset from path_base to the file to check ancestry for,
432 e.g. 'projectA/file1.txt'. path = source_repos_url + path_base + path_offset.
433 'path_rev' is the revision ("svn log") that we're processing from the source repo.
434 'copyfrom_path' is copy-from path, e.g. '/branches/bug123/projectA/file1.txt'
435 'copyfrom_rev' is revision this copy-from path was copied at.
436 """
437
438 done = False
439 source_base = source_url[len(source_repos_url):]
440 working_path = copyfrom_path
441 working_rev = copyfrom_rev
442 ancestors_temp = [{'path': path_base+"/"+path_offset, 'revision': path_rev, 'copyfrom_path': copyfrom_path, 'copyfrom_rev': copyfrom_rev}]
443 while not done:
444 # Get the first "svn log" entry for this path (relative to @rev)
445 #working_path = working_base + "/" + working_offset
446 if debug:
447 print prefix+"\x1b[33m" + ">> find_svn_ancestors: " + source_repos_url + working_path+"@"+str(working_rev) + "\x1b[0m"
448 log_entry = get_first_svn_log_entry(source_repos_url + working_path+"@"+str(working_rev), 1, str(working_rev), True)
449 if not log_entry:
450 done = True
451 break
452 # Search for any actions on our target path (or parent paths).
453 changed_paths_temp = []
454 for d in log_entry['changed_paths']:
455 path = d['path']
456 if path in working_path:
457 changed_paths_temp.append({'path': path, 'data': d})
458 if not changed_paths_temp:
459 # If no matches, then we've hit the end of the chain and this path has no ancestry back to source_url.
460 done = True
461 continue
462 # Reverse-sort any matches, so that we start with the most-granular (deepest in the tree) path.
463 changed_paths = sorted(changed_paths_temp, key=itemgetter('path'), reverse=True)
464 # Find the action for our working_path in this revision
465 for v in changed_paths:
466 d = v['data']
467 path = d['path']
468 # Check action-type for this file
469 action = d['action']
470 if action not in 'MARD':
471 display_error("In SVN rev. %d: action '%s' not supported. \
472 Please report a bug!" % (log_entry['revision'], action))
473 if debug:
474 debug_desc = "> " + action + " " + path
475 if d['copyfrom_path']:
476 debug_desc += " (from " + d['copyfrom_path']+"@"+str(d['copyfrom_revision']) + ")"
477 print prefix+"\x1b[33m" + debug_desc + "\x1b[0m"
478
479 if action == 'R':
480 # If file/folder was replaced, it has no ancestor
481 ancestors_temp = []
482 done = True
483 break
484 if action == 'D':
485 # If file/folder was deleted, it has no ancestor
486 ancestors_temp = []
487 done = True
488 break
489 if action == 'A':
490 # If file/folder was added but not a copy, it has no ancestor
491 if not d['copyfrom_path']:
492 ancestors_temp = []
493 done = True
494 break
495 # Else, file/folder was added and is a copy, so add an entry to our ancestors list
496 # and keep checking for ancestors
497 if debug:
498 print prefix+"\x1b[33m" + ">> find_svn_ancestors: Found copy-from: " + \
499 path + " --> " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + "\x1b[0m"
500 ancestors_temp.append({'path': path, 'revision': log_entry['revision'],
501 'copyfrom_path': d['copyfrom_path'], 'copyfrom_rev': d['copyfrom_revision']})
502 working_path = working_path.replace(d['path'], d['copyfrom_path'])
503 working_rev = d['copyfrom_revision']
504 # If we found a copy-from case which matches our source_base, we're done
505 if source_base in working_path:
506 done = True
507 break
508 # Else, follow the copy and keep on searching
509 break
510 ancestors = []
511 if ancestors_temp:
512 working_path = path_base+"/"+path_offset
513 for idx in range(0, len(ancestors_temp)):
514 d = ancestors_temp[idx]
515 working_path = working_path.replace(d['path'], d['copyfrom_path'])
516 working_rev = d['copyfrom_rev']
517 ancestors.append({'path': working_path, 'revision': working_rev})
518 if debug:
519 max_len = 0
520 for idx in range(len(ancestors)):
521 d = ancestors[idx]
522 max_len = max(max_len, len(d['path']+"@"+str(d['revision'])))
523 print prefix+"\x1b[93m" + ">> find_svn_ancestors: Found parent ancestors: " + "\x1b[0m"
524 for idx in range(len(ancestors)-1):
525 d = ancestors[idx]
526 d_next = ancestors[idx+1]
527 print prefix+"\x1b[33m" + " ["+str(idx)+"] " + str(d['path']+"@"+str(d['revision'])).ljust(max_len) + \
528 " <-- " + str(d_next['path']+"@"+str(d_next['revision'])).ljust(max_len) + "\x1b[0m"
529 return ancestors
530
531 def get_rev_map(rev_map, src_rev, prefix):
532 """
533 Find the equivalent rev # in the target repo for the given rev # from the source repo.
534 """
535
536 # Find the highest entry less-than-or-equal-to src_rev
537 for rev in range(src_rev+1, 1, -1):
538 if debug:
539 print prefix + "\x1b[32m" + ">> get_rev_map: rev="+str(rev)+" in_rev_map="+str(rev in rev_map) + "\x1b[0m"
540 if rev in rev_map:
541 return rev_map[rev]
542 # Else, we fell off the bottom of the rev_map. Ruh-roh...
543 display_error("Internal Error: get_rev_map: Unable to find match rev_map entry for src_rev=" + src_rev)
544
545 def get_svn_dirlist(svn_url, path_offset, svn_rev = ""):
546 # TODO: Rather than "svn ls" parent folder, instead just introducing an "ignore_error" param into run_svn()?
547 # Get path_offset's parent folder
548 p_path_offset = path_offset[:path_offset.rindex('/')] if '/' in path_offset else ""
549 # Get path_offset's leaf folder-name
550 p_path_sub = path_offset[len(p_path_offset)+1:]
551 #print "get_svn_dirlist: svn_url:"+svn_url+" path_offset:"+path_offset+" p_path_offset:"+p_path_offset+" p_path_sub:"+p_path_sub
552 args = ["list", "--recursive"]
553 if svn_rev:
554 args += ["-r", str(svn_rev)]
555 args += [(svn_url+"/"+p_path_offset if svn_url else p_path_offset)]
556 p_paths = run_svn(args)
557 p_paths = p_paths.strip("\n").split("\n") if len(p_paths)>1 else []
558 paths= []
559 if p_paths:
560 for path in p_paths:
561 #print "path:"+path+" p_path_sub:"+p_path_sub
562 if path.startswith(p_path_sub):
563 path_orig = path[len(p_path_sub)+1:]
564 if path_orig: paths.append(path_orig)
565 return paths
566
567 def replay_svn_copyfrom(source_repos_url, source_url, path_base, path_offset, target_url, svn_rev, \
568 copyfrom_path, copyfrom_rev, rev_map, is_dir = False, prefix = ""):
569 source_base = source_url[len(source_repos_url):]
570 srcfrom_path = copyfrom_path
571 srcfrom_rev = copyfrom_rev
572 if debug:
573 print prefix + "\x1b[32m" + ">> replay_svn_copyfrom: Check copy-from: " + path_base+" "+path_offset + " --> " + copyfrom_path+"@"+str(copyfrom_rev) + "\x1b[0m"
574 if source_base in copyfrom_path:
575 # The copy-from path is inside source_base, no need to check ancestry.
576 if debug:
577 print prefix + "\x1b[32;1m" + ">> replay_svn_copyfrom: Check copy-from: Found copy (in source_base): " + copyfrom_path+"@"+str(copyfrom_rev) + "\x1b[0m"
578 else:
579 # Check if the copy-from path has ancestors which chain back to the current path_base
580 ancestors = find_svn_ancestors(source_repos_url, source_url,
581 path_base, path_offset, svn_rev,
582 copyfrom_path, copyfrom_rev, prefix+" ")
583 if ancestors:
584 # The copy-from path has ancestory back to source_url.
585 # ancestors[n] is the original (pre-branch-copy) trunk path.
586 # ancestors[n-1] is the first commit on the new branch.
587 copyfrom_path = ancestors[len(ancestors)-1]['path']
588 copyfrom_rev = ancestors[len(ancestors)-1]['revision']
589 if debug:
590 print prefix + "\x1b[32;1m" + ">> replay_svn_copyfrom: Check copy-from: Found parent: " + copyfrom_path+"@"+str(copyfrom_rev) + "\x1b[0m"
591 if not source_base in copyfrom_path:
592 # If this copy-from path has no ancestry back to source_url, then can't do a "svn copy".
593 # Create (parent) directory if needed
594 p_path = path_offset if is_dir else os.path.dirname(path_offset).strip() or '.'
595 if not os.path.exists(p_path):
596 os.makedirs(p_path)
597 # Export the entire added tree.
598 run_svn(["export", "--force", "-r", str(copyfrom_rev),
599 source_repos_url + copyfrom_path + "@" + str(copyfrom_rev), path_offset])
600 if not in_svn(path_offset):
601 run_svn(["add", "--parents", path_offset])
602 # TODO: Need to copy SVN properties from source repos
603 else:
604 copyfrom_offset = copyfrom_path[len(source_base):].strip('/')
605 if debug:
606 print prefix + "\x1b[32m" + ">> replay_svn_copyfrom: svn_copy: Copy-from: " + copyfrom_path+"@"+str(copyfrom_rev) + " path_base: "+path_base + "\x1b[0m"
607 # Copy this path from the equivalent path+rev in the target repo, to create the
608 # equivalent history.
609 tgt_rev = get_rev_map(rev_map, copyfrom_rev, prefix+" ")
610 if debug:
611 print prefix + "\x1b[32m" + ">> replay_svn_copyfrom: get_rev_map: " + str(copyfrom_rev) + " (source) -> " + str(tgt_rev) + " (target)" + "\x1b[0m"
612 run_svn(["copy", "-r", tgt_rev, target_url+"/"+copyfrom_offset+"@"+str(tgt_rev), path_offset])
613 # Update the content in this fresh copy to match the final target revision.
614 if is_dir:
615 paths_remote = get_svn_dirlist(source_url, path_offset, svn_rev)
616 paths_local = get_svn_dirlist("", path_offset)
617 print prefix + "paths_local: " + str(paths_local)
618 print prefix + "paths_remote: " + str(paths_remote)
619 # Update files/folders which exist in remote but not local
620 for path in paths_remote:
621 if not path in paths_local:
622 path_is_dir = True if path[-1] == "/" else False
623 replay_svn_copyfrom(source_repos_url, source_url, path_base, path_offset+"/"+path,
624 target_url, svn_rev,
625 srcfrom_path+"/"+path, srcfrom_rev,
626 rev_map, path_is_dir, prefix+" ")
627 # Remove files/folders which exist in local but not remote
628 for path in paths_local:
629 if not path in paths_remote:
630 if svnlog_verbose:
631 print " D " + path_base+"/"+path_offset+"/"+path
632 run_svn(["remove", "--force", path_offset+"/"+path])
633 # TODO: Does this handle deleted folders too? Wouldn't want to have a case
634 # where we only delete all files from folder but leave orphaned folder around.
635 else:
636 run_svn(["export", "--force", "-r", str(svn_rev),
637 source_repos_url+path_base+"/"+path_offset+"@"+str(svn_rev), path_offset])
638
639 def replay_svn_ancestors(ancestors, source_repos_url, source_url, source_offset, \
640 target_url, rev_map, prefix = ""):
641 """
642 Given an array of ancestor info (find_svn_ancestors), replay the history
643 to correctly track renames ("svn copy/move") across branch-merges.
644
645 For example, consider a sequence of events like this:
646 1. svn copy /trunk /branches/fix1
647 2. (Make some changes on /branches/fix1)
648 3. svn mv /branches/fix1/Proj1 /branches/fix1/Proj2 " Rename folder
649 4. svn mv /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt " Rename file inside renamed folder
650 5. svn co /trunk && svn merge /branches/fix1
651 After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
652 and and add of /trunk/Proj2 copy-from /branches/fix1/Proj2. If we were just
653 to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
654 we'd lose the logical history that Proj2/file2.txt is really a descendant
655 of Proj1/file1.txt.
656
657 'ancestors' is the array returned by find_svn_ancestors() with the final
658 destination info appended to it by process_svn_log_entry().
659 'source_repos_url' is the full URL to the root of the source repository.
660 'source_url' is the full URL to the source path in the source repository.
661 """
662
663 source_base = source_url[len(source_repos_url):]
664 for idx in range(1, len(ancestors)-1):
665 d = ancestors[idx]
666 working_path = d['path']
667 working_rev = d['revision']
668 working_rev_next = ancestors[idx+1]['revision']
669 # Do a "svn log" on the *parent* directory of working_path, since trying to get log info
670 # for the "old path" on the revision where the copy/move happened will fail.
671 p_working_path = working_path[:working_path.rindex('/')] if '/' in working_path else ""
672 if debug:
673 print prefix + "\x1b[35m" + ">> replay_svn_ancestors: ["+str(idx)+"]" + working_path+"@"+str(working_rev) + " ["+p_working_path+"@"+str(working_rev)+":"+str(working_rev_next-1)+"]" + "\x1b[0m"
674 it_log_entries = iter_svn_log_entries(source_repos_url+p_working_path, working_rev, working_rev_next-1)
675 for log_entry in it_log_entries:
676 #print prefix + ">> replay_svn_ancestors: log_entry: (" + source_repos_url+working_path + ")"
677 #print prefix + log_entry
678 removed_paths = []
679 process_svn_log_entry(log_entry, source_repos_url, source_url,
680 source_repos_url+working_path, source_offset,
681 target_url, rev_map, removed_paths, [], prefix+" ")
682 # Process any deferred removed actions
683 if removed_paths:
684 for path_offset in removed_paths:
685 if svnlog_verbose:
686 print prefix + " D " + source_base+"/"+path_offset
687 run_svn(["remove", "--force", path_offset])
688
689 def process_svn_log_entry(log_entry, source_repos_url, source_url, source_log_base_url, source_offset, \
690 target_url, rev_map, removed_paths = [], commit_paths = [], prefix = ""):
691 """
692 Process SVN changes from the given log entry.
693 Returns array of all the paths in the working-copy that were changed,
694 i.e. the paths which need to be "svn commit".
695
696 'log_entry' is the array structure built by parse_svn_log_xml().
697 'source_repos_url' is the full URL to the root of the source repository.
698 'source_url' is the full URL to the source path in the source repository.
699 'source_log_base_url' is the full URL to the source path in the source
700 repository that we ran the "svn log" command based on. Most of the time,
701 this should match source_url, but when called from replay_svn_ancestors()
702 this could be a difference, e.g. source_url is "/trunk" but
703 source_log_base_url is "/branches/fix1".
704 'target_url' is the full URL to the target path in the target repository.
705 'rev_map' is the running mapping-table dictionary for source-repo rev #'s
706 to the equivalent target-repo rev #'s.
707 'removed_paths' is the working list of deferred deletions.
708 'commit_paths' is the working list of specific paths which changes to pass
709 to the final "svn commit".
710 """
711 # Get the relative offset of source_url and source_log_base_url based on source_repos_url
712 # e.g. '/branches/bug123'
713 source_base = source_url[len(source_repos_url):]
714 path_base = source_log_base_url[len(source_repos_url):]
715 if debug:
716 print prefix + "\x1b[32m" + ">> process_svn_log_entry: " + source_log_base_url + "@" + str(log_entry['revision']) + " (path_base:" + path_base + " source_offset:" + source_offset + ")" + "\x1b[0m"
717
718 svn_rev = log_entry['revision']
719
720 for d in log_entry['changed_paths']:
721 # Get the full path for this changed_path
722 # e.g. '/branches/bug123/projectA/file1.txt'
723 path = d['path']
724 if not path.startswith(path_base + "/"):
725 # Ignore changed files that are not part of this subdir
726 if path != path_base:
727 if debug:
728 print prefix + "\x1b[90m" + ">> process_svn_log_entry: Unrelated path: " + path + " (" + path_base + ")" + "\x1b[0m"
729 continue
730 # Calculate the offset (based on path_base) for this changed_path
731 # e.g. 'projectA/file1.txt'
732 # (path = path_base + "/" + path_offset)
733 # (source_path = source_base + "/" + source_offset + path_offset)
734 path_offset = path[len(path_base):].strip("/")
735 # Get the action for this path
736 action = d['action']
737 if action not in 'MARD':
738 display_error("In SVN rev. %d: action '%s' not supported. \
739 Please report a bug!" % (svn_rev, action))
740
741 # Try to be efficient and keep track of an explicit list of paths in the
742 # working copy that changed. If we commit from the root of the working copy,
743 # then SVN needs to crawl the entire working copy looking for pending changes.
744 # But, if we gather too many paths to commit, then we wipe commit_paths below
745 # and end-up doing a commit at the root of the working-copy.
746 if len (commit_paths) < 100:
747 commit_paths.append(path_offset)
748
749 # Special-handling for replace's
750 if action == 'R':
751 if svnlog_verbose:
752 msg = " " + action + " " + d['path']
753 if d['copyfrom_path']:
754 msg += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
755 print prefix + msg
756 # If file was "replaced" (deleted then re-added, all in same revision),
757 # then we need to run the "svn rm" first, then change action='A'. This
758 # lets the normal code below handle re-"svn add"'ing the files. This
759 # should replicate the "replace".
760 run_svn(["remove", "--force", source_offset+path_offset])
761 action = 'A'
762
763 # Handle all the various action-types
764 # (Handle "add" first, for "svn copy/move" support)
765 if action == 'A':
766 if svnlog_verbose:
767 msg = " " + action + " " + d['path']
768 if d['copyfrom_path']:
769 msg += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
770 print prefix + msg
771 # If we have any queued deletions for this same path, remove those if we're re-adding this path.
772 if (source_offset+path_offset) in removed_paths:
773 removed_paths.remove(source_offset+path_offset)
774 # Determine where to export from.
775 copyfrom_path = path
776 copyfrom_rev = svn_rev
777 svn_copy = False
778 path_is_dir = True if d['kind'] == 'dir' else False
779 # Handle cases where this "add" was a copy from another URL in the source repos
780 if d['copyfrom_revision']:
781 copyfrom_path = d['copyfrom_path']
782 copyfrom_rev = d['copyfrom_revision']
783 replay_svn_copyfrom(source_repos_url, source_url, path_base, path_offset,
784 target_url, svn_rev,
785 copyfrom_path, copyfrom_rev,
786 rev_map, path_is_dir, prefix+" ")
787 # Else just "svn export" the files from the source repo and "svn add" them.
788 else:
789 # Create (parent) directory if needed
790 p_path = source_offset+path_offset if path_is_dir else os.path.dirname(source_offset+path_offset).strip() or '.'
791 if not os.path.exists(p_path):
792 os.makedirs(p_path)
793 # Export the entire added tree.
794 run_svn(["export", "--force", "-r", str(copyfrom_rev),
795 source_repos_url + copyfrom_path + "@" + str(copyfrom_rev), source_offset+path_offset])
796 if not in_svn(source_offset+path_offset):
797 run_svn(["add", "--parents", source_offset+path_offset])
798 # TODO: Need to copy SVN properties from source repos
799
800 elif action == 'D':
801 # Queue "svn remove" commands, to allow the action == 'A' handling the opportunity
802 # to do smart "svn copy" handling on copy/move/renames.
803 if not (source_offset+path_offset) in removed_paths:
804 removed_paths.append(source_offset+path_offset)
805
806 elif action == 'M':
807 if svnlog_verbose:
808 print prefix + " " + action + " " + d['path']
809 # TODO: Is "svn merge -c" correct here? Should this just be an "svn export" plus
810 # proplist updating?
811 out = run_svn(["merge", "-c", str(svn_rev), "--non-recursive",
812 "--non-interactive", "--accept=theirs-full",
813 source_url+"/"+path_offset+"@"+str(svn_rev), path_offset])
814
815 else:
816 display_error("Internal Error: process_svn_log_entry: Unhandled 'action' value: '" + action + "'")
817
818 return commit_paths
819
820 def pull_svn_rev(log_entry, source_repos_url, source_repos_uuid, source_url, target_url, rev_map, keep_author=False):
821 """
822 Pull SVN changes from the given log entry.
823 Returns the new SVN revision.
824 If an exception occurs, it will rollback to revision 'svn_rev - 1'.
825 """
826 svn_rev = log_entry['revision']
827 print "\n(Starting source rev #"+str(svn_rev)+":)"
828 print "r"+str(log_entry['revision']) + " | " + \
829 log_entry['author'] + " | " + \
830 str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' '))
831 print log_entry['message']
832 print "------------------------------------------------------------------------"
833
834 # Process all the paths in this log entry
835 removed_paths = []
836 commit_paths = []
837 process_svn_log_entry(log_entry, source_repos_url, source_url, source_url, "",
838 target_url, rev_map, removed_paths, commit_paths)
839 # Process any deferred removed actions
840 if removed_paths:
841 path_base = source_url[len(source_repos_url):]
842 for path_offset in removed_paths:
843 if svnlog_verbose:
844 print " D " + path_base+"/"+path_offset
845 run_svn(["remove", "--force", path_offset])
846
847 # If we had too many individual paths to commit, wipe the list and just commit at
848 # the root of the working copy.
849 if len (commit_paths) > 99:
850 commit_paths = []
851
852 try:
853 commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author)
854 except ExternalCommandFailed:
855 # try to ignore the Properties conflicts on files and dirs
856 # use the copy from original_wc
857 # TODO: Need to re-work this?
858 #has_Conflict = False
859 #for d in log_entry['changed_paths']:
860 # p = d['path']
861 # p = p[len(path_base):].strip("/")
862 # if os.path.isfile(p):
863 # if os.path.isfile(p + ".prej"):
864 # has_Conflict = True
865 # shutil.copy(original_wc + os.sep + p, p)
866 # p2=os.sep + p.replace('_', '__').replace('/', '_') \
867 # + ".prej-" + str(svn_rev)
868 # shutil.move(p + ".prej", os.path.dirname(original_wc) + p2)
869 # w="\n### Properties conflicts ignored:"
870 # print "%s %s, in revision: %s\n" % (w, p, svn_rev)
871 # elif os.path.isdir(p):
872 # if os.path.isfile(p + os.sep + "dir_conflicts.prej"):
873 # has_Conflict = True
874 # p2=os.sep + p.replace('_', '__').replace('/', '_') \
875 # + "_dir__conflicts.prej-" + str(svn_rev)
876 # shutil.move(p + os.sep + "dir_conflicts.prej",
877 # os.path.dirname(original_wc) + p2)
878 # w="\n### Properties conflicts ignored:"
879 # print "%s %s, in revision: %s\n" % (w, p, svn_rev)
880 # out = run_svn(["propget", "svn:ignore",
881 # original_wc + os.sep + p])
882 # if out:
883 # run_svn(["propset", "svn:ignore", out.strip(), p])
884 # out = run_svn(["propget", "svn:externel",
885 # original_wc + os.sep + p])
886 # if out:
887 # run_svn(["propset", "svn:external", out.strip(), p])
888 ## try again
889 #if has_Conflict:
890 # commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author)
891 #else:
892 raise ExternalCommandFailed
893
894 # Add source-tracking revprop's
895 run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_uuid", source_repos_uuid])
896 run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_url", source_url])
897 run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_rev", svn_rev])
898 print "(Finished source rev #"+str(svn_rev)+")"
899
900
901 def main():
902 usage = "Usage: %prog [-a] [-c] [-r SVN rev] <Source SVN URL> <Target SVN URL>"
903 parser = OptionParser(usage)
904 parser.add_option("-a", "--keep-author", action="store_true",
905 dest="keep_author", help="Keep revision Author or not")
906 parser.add_option("-c", "--continue-from-break", action="store_true",
907 dest="cont_from_break",
908 help="Continue from previous break")
909 parser.add_option("-r", "--svn-rev", type="int", dest="svn_rev",
910 help="SVN revision to checkout from")
911 (options, args) = parser.parse_args()
912 if len(args) != 2:
913 display_error("incorrect number of arguments\n\nTry: svn2svn.py --help",
914 False)
915
916 source_url = args.pop(0).rstrip("/")
917 target_url = args.pop(0).rstrip("/")
918 if options.keep_author:
919 keep_author = True
920 else:
921 keep_author = False
922
923 # Find the greatest_rev in the source repo
924 svn_info = get_svn_info(source_url)
925 greatest_rev = svn_info['revision']
926 # Get the base URL for the source repos, e.g. 'svn://svn.example.com/svn/repo'
927 source_repos_url = svn_info['repos_url']
928 # Get the UUID for the source repos
929 source_repos_uuid = svn_info['repos_uuid']
930
931 dup_wc = "_dup_wc"
932 rev_map = {}
933
934 # if old working copy does not exist, disable continue mode
935 # TODO: Better continue support. Maybe include source repo's rev # in target commit info?
936 if not os.path.exists(dup_wc):
937 options.cont_from_break = False
938
939 if not options.cont_from_break:
940 # Warn if Target SVN URL existed
941 cmd = find_program("svn")
942 pipe = Popen([cmd] + ["list"] + [target_url], executable=cmd,
943 stdout=PIPE, stderr=PIPE)
944 out, err = pipe.communicate()
945 if pipe.returncode == 0:
946 print "Target SVN URL: %s existed!" % target_url
947 if out:
948 print out
949 print "Press 'Enter' to Continue, 'Ctrl + C' to Cancel..."
950 print "(Timeout in 5 seconds)"
951 rfds, wfds, efds = select.select([sys.stdin], [], [], 5)
952
953 # Get log entry for the SVN revision we will check out
954 if options.svn_rev:
955 # If specify a rev, get log entry just before or at rev
956 svn_start_log = get_last_svn_log_entry(source_url, 1, options.svn_rev, False)
957 else:
958 # Otherwise, get log entry of branch creation
959 # TODO: This call is *very* expensive on a repo with lots of revisions.
960 # Even though the call is passing --limit 1, it seems like that limit-filter
961 # is happening after SVN has fetched the full log history.
962 svn_start_log = get_first_svn_log_entry(source_url, 1, greatest_rev, False)
963
964 # This is the revision we will start from for source_url
965 svn_rev = svn_start_log['revision']
966
967 # Check out a working copy of target_url
968 dup_wc = os.path.abspath(dup_wc)
969 if os.path.exists(dup_wc):
970 shutil.rmtree(dup_wc)
971 svn_checkout(target_url, dup_wc)
972 os.chdir(dup_wc)
973
974 # For the initial commit to the target URL, export all the contents from
975 # the source URL at the start-revision.
976 paths = run_svn(["list", "-r", str(svn_rev), source_url+"@"+str(svn_rev)])
977 if len(paths)>1:
978 paths = paths.strip("\n").split("\n")
979 for path in paths:
980 if not path:
981 # Skip null lines
982 break
983 # Directories have a trailing slash in the "svn list" output
984 if path[-1] == "/":
985 path=path.rstrip('/')
986 if not os.path.exists(path):
987 os.makedirs(path)
988 run_svn(["export", "--force", "-r" , str(svn_rev), source_url+"/"+path+"@"+str(svn_rev), path])
989 run_svn(["add", path])
990 commit_from_svn_log_entry(svn_start_log, [], keep_author)
991 # Add source-tracking revprop's
992 run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_uuid", source_repos_uuid])
993 run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_url", source_url])
994 run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_rev", svn_rev])
995 else:
996 dup_wc = os.path.abspath(dup_wc)
997 os.chdir(dup_wc)
998 # TODO: Need better resume support. For the time being, expect caller explictly passes in resume revision.
999 svn_rev = options.svn_rev
1000 if svn_rev < 1:
1001 display_error("Invalid arguments\n\nNeed to pass result rev # (-r) when using continue-mode (-c)", False)
1002
1003 # Load SVN log starting from svn_rev + 1
1004 it_log_entries = iter_svn_log_entries(source_url, svn_rev + 1, greatest_rev)
1005
1006 try:
1007 for log_entry in it_log_entries:
1008 # Replay this revision from source_url into target_url
1009 pull_svn_rev(log_entry, source_repos_url, source_repos_uuid, source_url, target_url, rev_map, keep_author)
1010 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
1011 run_svn(["up", dup_wc])
1012 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
1013 dup_info = get_svn_info(target_url)
1014 dup_rev = dup_info['revision']
1015 svn_rev = log_entry['revision']
1016 rev_map[svn_rev] = dup_rev
1017
1018 except KeyboardInterrupt:
1019 print "\nStopped by user."
1020 run_svn(["cleanup"])
1021 run_svn(["revert", "--recursive", "."])
1022 except:
1023 print "\nCommand failed with following error:\n"
1024 traceback.print_exc()
1025 run_svn(["cleanup"])
1026 run_svn(["revert", "--recursive", "."])
1027 finally:
1028 run_svn(["up"])
1029 print "\nFinished!"
1030
1031
1032 if __name__ == "__main__":
1033 main()
1034
1035 # vim:sts=4:sw=4: