]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn.py
Merge branch 'ancestors' into develop
[svn2svn.git] / svn2svn.py
1 #!/usr/bin/env python
2 """
3 svn2svn.py
4
5 Replicate (replay) changesets from one SVN repository to another:
6 * Maintains full logical history (e.g. uses "svn copy" for renames).
7 * Maintains original commit messages.
8 * Optionally maintain source author info. (Only supported if accessing
9 target SVN repo via file://)
10 * Cannot maintain original commit date, but appends original commit date
11 for each commit message: "Date: %d".
12 * Optionally run an external shell script before each replayed commit
13 to give the ability to dynamically exclude or modify files as part
14 of the replay.
15
16 License: GPLv2, the same as hgsvn.
17 Author: Tony Duckles (https://github.com/tonyduckles/svn2svn)
18 (This is a forked and heavily modified verison of http://code.google.com/p/svn2svn/)
19 """
20
21 import os
22 import sys
23 import time
24 import locale
25 import shutil
26 import select
27 import calendar
28 import traceback
29
30 from optparse import OptionParser,OptionGroup
31 from subprocess import Popen, PIPE
32 from datetime import datetime
33 from operator import itemgetter
34
35 try:
36 from xml.etree import cElementTree as ET
37 except ImportError:
38 try:
39 from xml.etree import ElementTree as ET
40 except ImportError:
41 try:
42 import cElementTree as ET
43 except ImportError:
44 from elementtree import ElementTree as ET
45
46 svn_log_args = ['log', '--xml']
47 svn_info_args = ['info', '--xml']
48 svn_checkout_args = ['checkout', '-q']
49 svn_status_args = ['status', '--xml', '-v', '--ignore-externals']
50
51 # Setup debug options
52 debug = False
53 runsvn_timing = False # Display how long each "svn" OS command took to run?
54 # Setup verbosity options
55 runsvn_showcmd = False # Display every "svn" OS command we run?
56 runsvn_showout = False # Display the stdout results from every "svn" OS command we run?
57 svnlog_verbose = False # Display each action + changed-path as we walk the history?
58
59 # define exception class
60 class ExternalCommandFailed(RuntimeError):
61 """
62 An external command failed.
63 """
64
65 def display_error(message, raise_exception = True):
66 """
67 Display error message, then terminate.
68 """
69 print "Error:", message
70 print
71 if raise_exception:
72 raise ExternalCommandFailed
73 else:
74 sys.exit(1)
75
76 # Windows compatibility code by Bill Baxter
77 if os.name == "nt":
78 def find_program(name):
79 """
80 Find the name of the program for Popen.
81 Windows is finnicky about having the complete file name. Popen
82 won't search the %PATH% for you automatically.
83 (Adapted from ctypes.find_library)
84 """
85 # See MSDN for the REAL search order.
86 base, ext = os.path.splitext(name)
87 if ext:
88 exts = [ext]
89 else:
90 exts = ['.bat', '.exe']
91 for directory in os.environ['PATH'].split(os.pathsep):
92 for e in exts:
93 fname = os.path.join(directory, base + e)
94 if os.path.exists(fname):
95 return fname
96 return None
97 else:
98 def find_program(name):
99 """
100 Find the name of the program for Popen.
101 On Unix, popen isn't picky about having absolute paths.
102 """
103 return name
104
105 def shell_quote(s):
106 if runsvn_showcmd:
107 import re
108 p = re.compile('^[A-Za-z0-9=-]+$')
109 if p.match(s):
110 return s
111 if os.name == "nt":
112 q = '"'
113 else:
114 q = "'"
115 return q + s.replace('\\', '\\\\').replace("'", "'\"'\"'") + q
116
117 locale_encoding = locale.getpreferredencoding()
118
119 def run_svn(args, fail_if_stderr=False, ignore_retcode_err=False, encoding="utf-8"):
120 """
121 Run svn cmd in PIPE
122 exit if svn cmd failed
123 """
124 def _transform_arg(a):
125 if isinstance(a, unicode):
126 a = a.encode(encoding or locale_encoding)
127 elif not isinstance(a, str):
128 a = str(a)
129 return a
130 t_args = map(_transform_arg, args)
131
132 cmd = find_program("svn")
133 cmd_string = str(" ".join(map(shell_quote, [cmd] + t_args)))
134 if runsvn_showcmd:
135 # Default to bright-blue for svn commands that will take action on the working-copy.
136 color = "94"
137 # For status-only commands (or commands that aren't important to highlight), show in dim-blue.
138 status_cmds = ['status', 'st', 'log', 'info', 'list', 'propset', 'update', 'up', 'cleanup', 'revert']
139 if args[0] in status_cmds:
140 color = "34"
141 print "\x1b[34m"+"$"+"\x1b["+color+"m", cmd_string + "\x1b[0m"
142 if runsvn_timing:
143 time1 = time.time()
144 pipe = Popen([cmd] + t_args, executable=cmd, stdout=PIPE, stderr=PIPE)
145 out, err = pipe.communicate()
146 if runsvn_timing:
147 time2 = time.time()
148 print "(" + str(round(time2-time1,4)) + " elapsed)"
149 if out and runsvn_showout:
150 print out
151 if (pipe.returncode != 0 and not ignore_retcode_err) or (fail_if_stderr and err.strip()):
152 display_error("External program failed (return code %d): %s\n%s"
153 % (pipe.returncode, cmd_string, err))
154 return out
155
156 def svn_date_to_timestamp(svn_date):
157 """
158 Parse an SVN date as read from the XML output and
159 return the corresponding timestamp.
160 """
161 # Strip microseconds and timezone (always UTC, hopefully)
162 # XXX there are various ISO datetime parsing routines out there,
163 # cf. http://seehuhn.de/comp/pdate
164 date = svn_date.split('.', 2)[0]
165 time_tuple = time.strptime(date, "%Y-%m-%dT%H:%M:%S")
166 return calendar.timegm(time_tuple)
167
168 def parse_svn_info_xml(xml_string):
169 """
170 Parse the XML output from an "svn info" command and extract
171 useful information as a dict.
172 """
173 d = {}
174 tree = ET.fromstring(xml_string)
175 entry = tree.find('.//entry')
176 if entry:
177 d['url'] = entry.find('url').text
178 d['revision'] = int(entry.get('revision'))
179 d['repos_url'] = tree.find('.//repository/root').text
180 d['repos_uuid'] = tree.find('.//repository/uuid').text
181 d['last_changed_rev'] = int(tree.find('.//commit').get('revision'))
182 d['kind'] = entry.get('kind')
183 return d
184
185 def parse_svn_log_xml(xml_string):
186 """
187 Parse the XML output from an "svn log" command and extract
188 useful information as a list of dicts (one per log changeset).
189 """
190 l = []
191 tree = ET.fromstring(xml_string)
192 for entry in tree.findall('logentry'):
193 d = {}
194 d['revision'] = int(entry.get('revision'))
195 # Some revisions don't have authors, most notably
196 # the first revision in a repository.
197 author = entry.find('author')
198 d['author'] = author is not None and author.text or None
199 d['date'] = svn_date_to_timestamp(entry.find('date').text)
200 # Some revisions may have empty commit message
201 message = entry.find('msg')
202 message = message is not None and message.text is not None \
203 and message.text.strip() or ""
204 # Replace DOS return '\r\n' and MacOS return '\r' with unix return '\n'
205 d['message'] = message.replace('\r\n', '\n').replace('\n\r', '\n'). \
206 replace('\r', '\n')
207 revprops = []
208 for prop in entry.findall('.//revprops/property'):
209 revprops.append({ 'name': prop.get('name'), 'value': prop.text })
210 d['revprops'] = revprops
211 paths = []
212 for path in entry.findall('.//paths/path'):
213 copyfrom_rev = path.get('copyfrom-rev')
214 if copyfrom_rev:
215 copyfrom_rev = int(copyfrom_rev)
216 paths.append({
217 'path': path.text,
218 'kind': path.get('kind'),
219 'action': path.get('action'),
220 'copyfrom_path': path.get('copyfrom-path'),
221 'copyfrom_revision': copyfrom_rev,
222 })
223 # Need to sort paths (i.e. into hierarchical order), so that process_svn_log_entry()
224 # can process actions in depth-first order.
225 d['changed_paths'] = sorted(paths, key=itemgetter('path'))
226 l.append(d)
227 return l
228
229 def parse_svn_status_xml(xml_string, base_dir=None):
230 """
231 Parse the XML output from an "svn status" command and extract
232 useful info as a list of dicts (one per status entry).
233 """
234 l = []
235 tree = ET.fromstring(xml_string)
236 for entry in tree.findall('.//entry'):
237 d = {}
238 path = entry.get('path')
239 if base_dir is not None:
240 assert path.startswith(base_dir)
241 path = path[len(base_dir):].lstrip('/\\')
242 d['path'] = path
243 wc_status = entry.find('wc-status')
244 d['wc_status'] = {
245 'props': wc_status.get('props'),
246 'item': wc_status.get('item'),
247 'copied': wc_status.get('copied'),
248 'revision': wc_status.get('revision'),
249 }
250 if d['wc_status']['item'] == 'external':
251 d['type'] = 'external'
252 elif d['wc_status']['item'] == 'deleted':
253 d['type'] = 'deleted'
254 elif d['wc_status']['item'] == 'added':
255 d['type'] = 'added'
256 elif (wc_status.get('revision') is not None) or (d['wc_status']['item'] == 'normal'):
257 d['type'] = 'normal'
258 else:
259 d['type'] = 'unversioned'
260 l.append(d)
261 return l
262
263 def get_svn_info(svn_url_or_wc, rev_number=None):
264 """
265 Get SVN information for the given URL or working copy,
266 with an optionally specified revision number.
267 Returns a dict as created by parse_svn_info_xml().
268 """
269 if rev_number is not None:
270 args = [svn_url_or_wc + "@" + str(rev_number)]
271 else:
272 args = [svn_url_or_wc]
273 xml_string = run_svn(svn_info_args + args, fail_if_stderr=True)
274 return parse_svn_info_xml(xml_string)
275
276 def svn_checkout(svn_url, checkout_dir, rev_number=None):
277 """
278 Checkout the given URL at an optional revision number.
279 """
280 args = []
281 if rev_number is not None:
282 args += ['-r', rev_number]
283 args += [svn_url, checkout_dir]
284 return run_svn(svn_checkout_args + args)
285
286 def run_svn_log(svn_url_or_wc, rev_start, rev_end, limit, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
287 """
288 Fetch up to 'limit' SVN log entries between the given revisions.
289 """
290 args = []
291 if stop_on_copy:
292 args += ['--stop-on-copy']
293 if get_changed_paths:
294 args += ['-v']
295 if get_revprops:
296 args += ['--with-all-revprops']
297 url = str(svn_url_or_wc)
298 if rev_start != 'HEAD' and rev_end != 'HEAD':
299 args += ['-r', '%s:%s' % (rev_start, rev_end)]
300 if not "@" in svn_url_or_wc:
301 url += "@" + str(max(rev_start, rev_end))
302 args += ['--limit', str(limit), url]
303 xml_string = run_svn(svn_log_args + args)
304 return parse_svn_log_xml(xml_string)
305
306 def get_svn_status(svn_wc, flags=None):
307 """
308 Get SVN status information about the given working copy.
309 """
310 # Ensure proper stripping by canonicalizing the path
311 svn_wc = os.path.abspath(svn_wc)
312 args = []
313 if flags:
314 args += [flags]
315 args += [svn_wc]
316 xml_string = run_svn(svn_status_args + args)
317 return parse_svn_status_xml(xml_string, svn_wc)
318
319 def get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
320 """
321 Get the first SVN log entry in the requested revision range.
322 """
323 entries = run_svn_log(svn_url, rev_start, rev_end, 1, stop_on_copy, get_changed_paths, get_revprops)
324 if not entries:
325 display_error("No SVN log for %s between revisions %s and %s" %
326 (svn_url, rev_start, rev_end))
327
328 return entries[0]
329
330 def get_first_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
331 """
332 Get the first log entry after/at the given revision number in an SVN branch.
333 By default the revision number is set to 0, which will give you the log
334 entry corresponding to the branch creaction.
335
336 NOTE: to know whether the branch creation corresponds to an SVN import or
337 a copy from another branch, inspect elements of the 'changed_paths' entry
338 in the returned dictionary.
339 """
340 return get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True, get_changed_paths=True)
341
342 def get_last_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
343 """
344 Get the last log entry before/at the given revision number in an SVN branch.
345 By default the revision number is set to HEAD, which will give you the log
346 entry corresponding to the latest commit in branch.
347 """
348 return get_one_svn_log_entry(svn_url, rev_end, rev_start, stop_on_copy=True, get_changed_paths=True)
349
350
351 log_duration_threshold = 10.0
352 log_min_chunk_length = 10
353
354 def iter_svn_log_entries(svn_url, first_rev, last_rev, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
355 """
356 Iterate over SVN log entries between first_rev and last_rev.
357
358 This function features chunked log fetching so that it isn't too nasty
359 to the SVN server if many entries are requested.
360 """
361 cur_rev = first_rev
362 chunk_length = log_min_chunk_length
363 chunk_interval_factor = 1.0
364 while last_rev == "HEAD" or cur_rev <= last_rev:
365 start_t = time.time()
366 stop_rev = min(last_rev, cur_rev + int(chunk_length * chunk_interval_factor))
367 entries = run_svn_log(svn_url, cur_rev, stop_rev, chunk_length, stop_on_copy , get_changed_paths, get_revprops)
368 duration = time.time() - start_t
369 if not entries:
370 if stop_rev == last_rev:
371 break
372 cur_rev = stop_rev + 1
373 chunk_interval_factor *= 2.0
374 continue
375 for e in entries:
376 yield e
377 cur_rev = e['revision'] + 1
378 # Adapt chunk length based on measured request duration
379 if duration < log_duration_threshold:
380 chunk_length = int(chunk_length * 2.0)
381 elif duration > log_duration_threshold * 2:
382 chunk_length = max(log_min_chunk_length, int(chunk_length / 2.0))
383
384 def commit_from_svn_log_entry(entry, files=None, keep_author=False, revprops=[]):
385 """
386 Given an SVN log entry and an optional sequence of files, do an svn commit.
387 """
388 # TODO: Run optional external shell hook here, for doing pre-commit filtering
389 # This will use the local timezone for displaying commit times
390 timestamp = int(entry['date'])
391 svn_date = str(datetime.fromtimestamp(timestamp))
392 # Uncomment this one one if you prefer UTC commit times
393 #svn_date = "%d 0" % timestamp
394 if keep_author:
395 options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date, "--username", entry['author']]
396 else:
397 options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date + "\nAuthor: " + entry['author']]
398 if revprops:
399 for r in revprops:
400 options += ["--with-revprop", r['name']+"="+str(r['value'])]
401 if files:
402 options += list(files)
403 print "(Committing source rev #"+str(entry['revision'])+"...)"
404 run_svn(options)
405
406 def in_svn(p, in_repo=False):
407 """
408 Check if a given file/folder is being tracked by Subversion.
409 Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
410 With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
411 Use "svn status" to check the status of the file/folder.
412 """
413 entries = get_svn_status(p)
414 if not entries:
415 return False
416 d = entries[0]
417 # If caller requires this path to be in the SVN repo, prevent returning True for locally-added paths.
418 if in_repo and (d['type'] == 'added' or d['wc_status']['revision'] is None):
419 return False
420 return True if (d['type'] == 'normal' or d['type'] == 'added') else False
421
422 def find_svn_ancestors(svn_repos_url, base_path, source_path, source_rev, prefix = ""):
423 """
424 Given a source path, walk the SVN history backwards to inspect the ancestory of
425 that path, seeing if it traces back to base_path. Build an array of copyfrom_path
426 and copyfrom_revision pairs for each of the "svn copies". If we find a copyfrom_path
427 which base_path is a substring match of (e.g. we crawled back to the initial branch-
428 copy from trunk), then return the collection of ancestor paths. Otherwise,
429 copyfrom_path has no ancestory compared to base_path.
430
431 This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
432 file/folder was renamed in a branch and then that branch was merged back to trunk.
433
434 'svn_repos_url' is the full URL to the root of the SVN repository,
435 e.g. 'file:///path/to/repo'
436 'base_path' is the path in the SVN repo to the target path we're trying to
437 trace ancestry back to, e.g. 'trunk'.
438 'source_path' is the path in the SVN repo to the source path to start checking
439 ancestry at, e.g. 'branches/fix1/projectA/file1.txt'.
440 (full_path = svn_repos_url+base_path+"/"+path_offset)
441 'source_rev' is the revision to start walking the history of source_path backwards from.
442 """
443 if debug:
444 print prefix+"\x1b[33m" + ">> find_svn_ancestors: Start: ("+svn_repos_url+") source_path: "+source_path+"@"+str(source_rev)+" base_path: "+base_path + "\x1b[0m"
445 done = False
446 working_path = base_path+"/"+source_path
447 working_rev = source_rev
448 first_iter_done = False
449 ancestors_temp = []
450 while not done:
451 # Get the first "svn log" entry for this path (relative to @rev)
452 if debug:
453 print prefix+"\x1b[33m" + ">> find_svn_ancestors: " + svn_repos_url + working_path+"@"+str(working_rev) + "\x1b[0m"
454 log_entry = get_first_svn_log_entry(svn_repos_url + working_path+"@"+str(working_rev), 1, str(working_rev), True)
455 if not log_entry:
456 if debug:
457 print prefix+"\x1b[33m" + ">> find_svn_ancestors: Done: no log_entry" + "\x1b[0m"
458 done = True
459 break
460 # If we found a copy-from case which matches our base_path, we're done.
461 # ...but only if we've at least tried to search for the first copy-from path.
462 if first_iter_done and working_path.startswith(base_path):
463 if debug:
464 print prefix+"\x1b[33m" + ">> find_svn_ancestors: Done: Found working_path.startswith(base_path) and first_iter_done=True" + "\x1b[0m"
465 done = True
466 break
467 first_iter_done = True
468 # Search for any actions on our target path (or parent paths).
469 changed_paths_temp = []
470 for d in log_entry['changed_paths']:
471 path = d['path']
472 if path in working_path:
473 changed_paths_temp.append({'path': path, 'data': d})
474 if not changed_paths_temp:
475 # If no matches, then we've hit the end of the chain and this path has no ancestry back to base_path.
476 if debug:
477 print prefix+"\x1b[33m" + ">> find_svn_ancestors: Done: No matching changed_paths" + "\x1b[0m"
478 done = True
479 continue
480 # Reverse-sort any matches, so that we start with the most-granular (deepest in the tree) path.
481 changed_paths = sorted(changed_paths_temp, key=itemgetter('path'), reverse=True)
482 # Find the action for our working_path in this revision. Use a loop to check in reverse order,
483 # so that if the target file/folder is "M" but has a parent folder with an "A" copy-from.
484 for v in changed_paths:
485 d = v['data']
486 path = d['path']
487 # Check action-type for this file
488 action = d['action']
489 if action not in 'MARD':
490 display_error("In SVN rev. %d: action '%s' not supported. \
491 Please report a bug!" % (log_entry['revision'], action))
492 if debug:
493 debug_desc = "> " + action + " " + path
494 if d['copyfrom_path']:
495 debug_desc += " (from " + d['copyfrom_path']+"@"+str(d['copyfrom_revision']) + ")"
496 print prefix+"\x1b[33m" + debug_desc + "\x1b[0m"
497 if action == 'D':
498 # If file/folder was deleted, it has no ancestor
499 ancestors_temp = []
500 if debug:
501 print prefix+"\x1b[33m" + ">> find_svn_ancestors: Done: deleted" + "\x1b[0m"
502 done = True
503 break
504 if action in 'RA':
505 # If file/folder was added/replaced but not a copy, it has no ancestor
506 if not d['copyfrom_path']:
507 ancestors_temp = []
508 if debug:
509 print prefix+"\x1b[33m" + ">> find_svn_ancestors: Done: "+("Added" if action == "A" else "Replaced")+" with no copyfrom_path" + "\x1b[0m"
510 done = True
511 break
512 # Else, file/folder was added/replaced and is a copy, so add an entry to our ancestors list
513 # and keep checking for ancestors
514 if debug:
515 print prefix+"\x1b[33m" + ">> find_svn_ancestors: Found copy-from ("+action+"): " + \
516 path + " --> " + d['copyfrom_path']+"@"+str(d['copyfrom_revision']) + "\x1b[0m"
517 ancestors_temp.append({'path': path, 'revision': log_entry['revision'],
518 'copyfrom_path': d['copyfrom_path'], 'copyfrom_rev': d['copyfrom_revision']})
519 working_path = working_path.replace(d['path'], d['copyfrom_path'])
520 working_rev = d['copyfrom_revision']
521 # Follow the copy and keep on searching
522 break
523 ancestors = []
524 if ancestors_temp:
525 ancestors.append({'path': base_path+"/"+source_path, 'revision': source_rev})
526 working_path = base_path+"/"+source_path
527 for idx in range(len(ancestors_temp)):
528 d = ancestors_temp[idx]
529 working_path = working_path.replace(d['path'], d['copyfrom_path'])
530 working_rev = d['copyfrom_rev']
531 ancestors.append({'path': working_path, 'revision': working_rev})
532 if debug:
533 max_len = 0
534 for idx in range(len(ancestors)):
535 d = ancestors[idx]
536 max_len = max(max_len, len(d['path']+"@"+str(d['revision'])))
537 print prefix+"\x1b[93m" + ">> find_svn_ancestors: Found parent ancestors: " + "\x1b[0m"
538 for idx in range(len(ancestors)-1):
539 d = ancestors[idx]
540 d_next = ancestors[idx+1]
541 print prefix+"\x1b[33m" + " ["+str(idx)+"] " + str(d['path']+"@"+str(d['revision'])).ljust(max_len) + \
542 " <-- " + str(d_next['path']+"@"+str(d_next['revision'])).ljust(max_len) + "\x1b[0m"
543 else:
544 if debug:
545 print prefix+"\x1b[33m" + ">> find_svn_ancestors: No ancestor-chain found: " + svn_repos_url+base_path+"/"+source_path+"@"+(str(source_rev)) + "\x1b[0m"
546 return ancestors
547
548 def get_rev_map(rev_map, src_rev, prefix):
549 """
550 Find the equivalent rev # in the target repo for the given rev # from the source repo.
551 """
552 if debug:
553 print prefix + "\x1b[32m" + ">> get_rev_map("+str(src_rev)+")" + "\x1b[0m"
554 # Find the highest entry less-than-or-equal-to src_rev
555 for rev in range(src_rev, 0, -1):
556 if debug:
557 print prefix + "\x1b[32m" + ">> get_rev_map: rev="+str(rev)+" in_rev_map="+str(rev in rev_map) + "\x1b[0m"
558 if rev in rev_map:
559 return rev_map[rev]
560 # Else, we fell off the bottom of the rev_map. Ruh-roh...
561 return None
562
563 def get_svn_dirlist(svn_path, svn_rev = ""):
564 """
565 Get a list of all the child contents (recusive) of the given folder path.
566 """
567 args = ["list"]
568 path = svn_path
569 if svn_rev:
570 args += ["-r", str(svn_rev)]
571 path += "@"+str(svn_rev)
572 args += [path]
573 paths = run_svn(args, False, True)
574 paths = paths.strip("\n").split("\n") if len(paths)>1 else []
575 return paths
576
577 def _add_export_path(export_paths, path_offset):
578 found = False
579 for p in export_paths:
580 if path_offset.startswith(p):
581 found = True
582 break
583 if not found:
584 export_paths.append(path_offset)
585 return export_paths
586
587 def do_svn_add(source_repos_url, source_url, path_offset, target_url, source_rev, \
588 parent_copyfrom_path="", parent_copyfrom_rev="", export_paths={}, \
589 rev_map={}, is_dir = False, prefix = ""):
590 """
591 Given the add'd source path, replay the "svn add/copy" commands to correctly
592 track renames across copy-from's.
593
594 For example, consider a sequence of events like this:
595 1. svn copy /trunk /branches/fix1
596 2. (Make some changes on /branches/fix1)
597 3. svn mv /branches/fix1/Proj1 /branches/fix1/Proj2 " Rename folder
598 4. svn mv /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt " Rename file inside renamed folder
599 5. svn co /trunk && svn merge /branches/fix1
600 After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
601 and and add of /trunk/Proj2 copy-from /branches/fix1/Proj2. If we were just
602 to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
603 we'd lose the logical history that Proj2/file2.txt is really a descendant
604 of Proj1/file1.txt.
605
606 'source_repos_url' is the full URL to the root of the source repository.
607 'source_url' is the full URL to the source path in the source repository.
608 'path_offset' is the offset from source_base to the file to check ancestry for,
609 e.g. 'projectA/file1.txt'. path = source_repos_url + source_base + path_offset.
610 'target_url' is the full URL to the target path in the target repository.
611 'source_rev' is the revision ("svn log") that we're processing from the source repo.
612 'parent_copyfrom_path' and 'parent_copyfrom_rev' is the copy-from path of the parent
613 directory, when being called recursively by do_svn_add_dir().
614 'export_paths' is the list of path_offset's that we've deferred running "svn export" on.
615 'rev_map' is the running mapping-table dictionary for source-repo rev #'s
616 to the equivalent target-repo rev #'s.
617 'is_dir' is whether path_offset is a directory (rather than a file).
618 """
619 source_base = source_url[len(source_repos_url):]
620 if debug:
621 print prefix + "\x1b[32m" + ">> do_svn_add: " + source_base+"/"+path_offset+"@"+str(source_rev) + \
622 (" (parent-copyfrom: "+parent_copyfrom_path+"@"+str(parent_copyfrom_rev)+")" if parent_copyfrom_path else "") + "\x1b[0m"
623 # Check if the given path has ancestors which chain back to the current source_base
624 found_ancestor = False
625 ancestors = find_svn_ancestors(source_repos_url, source_base, path_offset, source_rev, prefix+" ")
626 # ancestors[n] is the original (pre-branch-copy) trunk path.
627 # ancestors[n-1] is the first commit on the new branch.
628 copyfrom_path = ancestors[len(ancestors)-1]['path'] if ancestors else ""
629 copyfrom_rev = ancestors[len(ancestors)-1]['revision'] if ancestors else ""
630 if ancestors:
631 # The copy-from path has ancestory back to source_url.
632 if debug:
633 print prefix + "\x1b[32;1m" + ">> do_svn_add: Check copy-from: Found parent: " + copyfrom_path+"@"+str(copyfrom_rev) + "\x1b[0m"
634 found_ancestor = True
635 # Map the copyfrom_rev (source repo) to the equivalent target repo rev #. This can
636 # return None in the case where copyfrom_rev is *before* our source_start_rev.
637 tgt_rev = get_rev_map(rev_map, copyfrom_rev, prefix+" ")
638 if debug:
639 print prefix + "\x1b[32m" + ">> do_svn_add: get_rev_map: " + str(copyfrom_rev) + " (source) -> " + str(tgt_rev) + " (target)" + "\x1b[0m"
640 else:
641 if debug:
642 print prefix + "\x1b[32;1m" + ">> do_svn_add: Check copy-from: No ancestor chain found." + "\x1b[0m"
643 found_ancestor = False
644 if found_ancestor and tgt_rev:
645 # Check if this path_offset in the target WC already has this ancestry, in which
646 # case there's no need to run the "svn copy" (again).
647 path_in_svn = in_svn(path_offset)
648 log_entry = get_last_svn_log_entry(path_offset, 1, 'HEAD', get_changed_paths=False) if in_svn(path_offset, True) else []
649 if (not log_entry or (log_entry['revision'] != tgt_rev)):
650 copyfrom_offset = copyfrom_path[len(source_base):].strip('/')
651 if debug:
652 print prefix + "\x1b[32m" + ">> do_svn_add: svn_copy: Copy-from: " + copyfrom_path+"@"+str(copyfrom_rev) + "\x1b[0m"
653 print prefix + "in_svn("+path_offset+") = " + str(path_in_svn)
654 print prefix + "copyfrom_path: "+copyfrom_path+" parent_copyfrom_path: "+parent_copyfrom_path
655 print prefix + "copyfrom_rev: "+str(copyfrom_rev)+" parent_copyfrom_rev: "+str(parent_copyfrom_rev)
656 if path_in_svn and \
657 ((parent_copyfrom_path and copyfrom_path.startswith(parent_copyfrom_path)) and \
658 (parent_copyfrom_rev and copyfrom_rev == parent_copyfrom_rev)):
659 # When being called recursively, if this child entry has the same ancestor as the
660 # the parent, then no need to try to run another "svn copy".
661 if debug:
662 print prefix + "\x1b[32m" + ">> do_svn_add: svn_copy: Same ancestry as parent: " + parent_copyfrom_path+"@"+str(parent_copyfrom_rev) + "\x1b[0m"
663 pass
664 else:
665 # Copy this path from the equivalent path+rev in the target repo, to create the
666 # equivalent history.
667 if parent_copyfrom_path and svnlog_verbose:
668 # If we have a parent copy-from path, we mis-match that so display a status
669 # message describing the action we're mimic'ing. If path_in_svn, then this
670 # is logically a "replace" rather than an "add".
671 print " "+('R' if path_in_svn else 'A')+" "+source_base+"/"+path_offset+" (from "+ancestors[1]['path']+"@"+str(copyfrom_rev)+")"
672 if path_in_svn:
673 # If local file is already under version-control, then this is a replace.
674 if debug:
675 print prefix + "\x1b[32m" + ">> do_svn_add: pre-copy: local path already exists: " + path_offset + "\x1b[0m"
676 run_svn(["remove", "--force", path_offset])
677 run_svn(["copy", "-r", tgt_rev, target_url+"/"+copyfrom_offset+"@"+str(tgt_rev), path_offset])
678 # Export the final version of this file/folder from the source repo, to make
679 # sure we're up-to-date.
680 export_paths = _add_export_path(export_paths, path_offset)
681 else:
682 print prefix + "\x1b[32m" + ">> do_svn_add: Skipped 'svn copy': " + path_offset + "\x1b[0m"
683 else:
684 # Else, either this copy-from path has no ancestry back to source_url OR copyfrom_rev comes
685 # before our initial source_start_rev (i.e. tgt_rev == None), so can't do a "svn copy".
686 # Create (parent) directory if needed.
687 # TODO: This is (nearly) a duplicate of code in process_svn_log_entry(). Should this be
688 # split-out to a shared tag?
689 p_path = path_offset if is_dir else os.path.dirname(path_offset).strip() or '.'
690 if not os.path.exists(p_path):
691 run_svn(["mkdir", p_path])
692 if not in_svn(path_offset):
693 if is_dir:
694 # Export the final verison of all files in this folder.
695 export_paths = _add_export_path(export_paths, path_offset)
696 else:
697 # Export the final verison of this file. We *need* to do this before running
698 # the "svn add", even if we end-up re-exporting this file again via export_paths.
699 run_svn(["export", "--force", "-r", str(source_rev),
700 source_repos_url+source_base+"/"+path_offset+"@"+str(source_rev), path_offset])
701 # If not already under version-control, then "svn add" this file/folder.
702 run_svn(["add", "--parents", path_offset])
703 # TODO: Need to copy SVN properties from source repos
704 if is_dir:
705 # For any folders that we process, process any child contents, so that we correctly
706 # replay copies/replaces/etc.
707 do_svn_add_dir(source_repos_url, source_url, path_offset, source_rev, target_url,
708 copyfrom_path, copyfrom_rev, export_paths, rev_map, prefix+" ")
709
710 def do_svn_add_dir(source_repos_url, source_url, path_offset, source_rev, target_url, \
711 parent_copyfrom_path, parent_copyfrom_rev, export_paths, rev_map, prefix=""):
712 source_base = source_url[len(source_repos_url):]
713 # Get the directory contents, to compare between the local WC (target_url) vs. the remote repo (source_url)
714 # TODO: paths_local won't include add'd paths because "svn ls" lists the contents of the
715 # associated remote repo folder. (Is this a problem?)
716 paths_local = get_svn_dirlist(path_offset)
717 paths_remote = get_svn_dirlist(source_url+"/"+path_offset, source_rev)
718 if debug:
719 print prefix + "\x1b[32m" + ">> do_svn_add_dir: paths_local: " + str(paths_local) + "\x1b[0m"
720 print prefix + "\x1b[32m" + ">> do_svn_add_dir: paths_remote: " + str(paths_remote) + "\x1b[0m"
721 # Update files/folders which exist in remote but not local
722 for path in paths_remote:
723 path_is_dir = True if path[-1] == "/" else False
724 working_path = path_offset+"/"+(path.rstrip('/') if path_is_dir else path)
725 do_svn_add(source_repos_url, source_url, working_path, target_url, source_rev,
726 parent_copyfrom_path, parent_copyfrom_rev, export_paths,
727 rev_map, path_is_dir, prefix+" ")
728 # Remove files/folders which exist in local but not remote
729 for path in paths_local:
730 if not path in paths_remote:
731 if svnlog_verbose:
732 print " D " + source_base+"/"+path_offset+"/"+path
733 run_svn(["remove", "--force", path_offset+"/"+path])
734 # TODO: Does this handle deleted folders too? Wouldn't want to have a case
735 # where we only delete all files from folder but leave orphaned folder around.
736
737 def process_svn_log_entry(log_entry, source_repos_url, source_url, target_url, \
738 rev_map, commit_paths = [], prefix = ""):
739 """
740 Process SVN changes from the given log entry.
741 Returns array of all the paths in the working-copy that were changed,
742 i.e. the paths which need to be "svn commit".
743
744 'log_entry' is the array structure built by parse_svn_log_xml().
745 'source_repos_url' is the full URL to the root of the source repository.
746 'source_url' is the full URL to the source path in the source repository.
747 'target_url' is the full URL to the target path in the target repository.
748 'rev_map' is the running mapping-table dictionary for source-repo rev #'s
749 to the equivalent target-repo rev #'s.
750 'commit_paths' is the working list of specific paths which changes to pass
751 to the final "svn commit".
752 """
753 removed_paths = []
754 export_paths = []
755 # Get the relative offset of source_url based on source_repos_url
756 # e.g. '/branches/bug123'
757 source_base = source_url[len(source_repos_url):]
758 source_rev = log_entry['revision']
759 if debug:
760 print prefix + "\x1b[32m" + ">> process_svn_log_entry: " + source_url+"@"+str(source_rev) + "\x1b[0m"
761 for d in log_entry['changed_paths']:
762 # Get the full path for this changed_path
763 # e.g. '/branches/bug123/projectA/file1.txt'
764 path = d['path']
765 if not path.startswith(source_base + "/"):
766 # Ignore changed files that are not part of this subdir
767 if path != source_base:
768 if debug:
769 print prefix + "\x1b[90m" + ">> process_svn_log_entry: Unrelated path: " + path + " (" + source_base + ")" + "\x1b[0m"
770 continue
771 # Calculate the offset (based on source_base) for this changed_path
772 # e.g. 'projectA/file1.txt'
773 # (path = source_base + "/" + path_offset)
774 path_offset = path[len(source_base):].strip("/")
775 # Get the action for this path
776 action = d['action']
777 if action not in 'MARD':
778 display_error("In SVN rev. %d: action '%s' not supported. \
779 Please report a bug!" % (source_rev, action))
780 if svnlog_verbose and (action not in 'D'):
781 # (Note: Skip displaying action message for 'D' here since we'll display that
782 # message when we process the deferred delete actions at the end.)
783 msg = " " + action + " " + d['path']
784 if d['copyfrom_path']:
785 msg += " (from " + d['copyfrom_path']+"@"+str(d['copyfrom_revision']) + ")"
786 print prefix + msg
787
788 # Try to be efficient and keep track of an explicit list of paths in the
789 # working copy that changed. If we commit from the root of the working copy,
790 # then SVN needs to crawl the entire working copy looking for pending changes.
791 # But, if we gather too many paths to commit, then we wipe commit_paths below
792 # and end-up doing a commit at the root of the working-copy.
793 if len (commit_paths) < 100:
794 commit_paths.append(path_offset)
795
796 # Special-handling for replace's
797 if action == 'R':
798 # If file was "replaced" (deleted then re-added, all in same revision),
799 # then we need to run the "svn rm" first, then change action='A'. This
800 # lets the normal code below handle re-"svn add"'ing the files. This
801 # should replicate the "replace".
802 run_svn(["remove", "--force", path_offset])
803 action = 'A'
804
805 # Handle all the various action-types
806 # (Handle "add" first, for "svn copy/move" support)
807 if action == 'A':
808 # If we have any queued deletions for this same path, remove those if we're re-adding this path.
809 if path_offset in removed_paths:
810 removed_paths.remove(path_offset)
811 # Determine where to export from.
812 svn_copy = False
813 path_is_dir = True if d['kind'] == 'dir' else False
814 # Handle cases where this "add" was a copy from another URL in the source repos
815 if d['copyfrom_revision']:
816 copyfrom_path = d['copyfrom_path']
817 copyfrom_rev = d['copyfrom_revision']
818 do_svn_add(source_repos_url, source_url, path_offset, target_url, source_rev,
819 "", "", export_paths, rev_map, path_is_dir, prefix+" ")
820 # Else just "svn export" the files from the source repo and "svn add" them.
821 else:
822 # Create (parent) directory if needed
823 p_path = path_offset if path_is_dir else os.path.dirname(path_offset).strip() or '.'
824 if not os.path.exists(p_path):
825 run_svn(["mkdir", p_path])
826 # Export the entire added tree.
827 if path_is_dir:
828 export_paths = _add_export_path(export_paths, path_offset)
829 else:
830 # Export the final verison of this file. We *need* to do this before running
831 # the "svn add", even if we end-up re-exporting this file again via export_paths.
832 run_svn(["export", "--force", "-r", str(source_rev),
833 source_repos_url+source_base+"/"+path_offset+"@"+str(source_rev), path_offset])
834 # TODO: Do we need the in_svn check here?
835 #if not in_svn(path_offset):
836 run_svn(["add", "--parents", path_offset])
837 # TODO: Need to copy SVN properties from source repos
838
839 elif action == 'D':
840 # Queue "svn remove" commands, to allow the action == 'A' handling the opportunity
841 # to do smart "svn copy" handling on copy/move/renames.
842 if not path_offset in removed_paths:
843 removed_paths.append(path_offset)
844
845 elif action == 'M':
846 # TODO: Is "svn merge -c" correct here? Should this just be an "svn export" plus
847 # proplist updating?
848 out = run_svn(["merge", "-c", str(source_rev), "--non-recursive",
849 "--non-interactive", "--accept=theirs-full",
850 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
851
852 else:
853 display_error("Internal Error: process_svn_log_entry: Unhandled 'action' value: '" + action + "'")
854
855 # Process any deferred removed actions
856 if removed_paths:
857 path_base = source_url[len(source_repos_url):]
858 for path_offset in removed_paths:
859 if svnlog_verbose:
860 print " D " + path_base+"/"+path_offset
861 run_svn(["remove", "--force", path_offset])
862 # Export the final version of all add'd paths from source_url
863 if export_paths:
864 for path_offset in export_paths:
865 run_svn(["export", "--force", "-r", str(source_rev),
866 source_repos_url+source_base+"/"+path_offset+"@"+str(source_rev), path_offset])
867
868 return commit_paths
869
870 def disp_svn_log_summary(log_entry):
871 print "\n(Starting source rev #"+str(log_entry['revision'])+":)"
872 print "r"+str(log_entry['revision']) + " | " + \
873 log_entry['author'] + " | " + \
874 str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' '))
875 print log_entry['message']
876 print "------------------------------------------------------------------------"
877
878 def pull_svn_rev(log_entry, source_repos_url, source_repos_uuid, source_url, target_url, rev_map, keep_author=False):
879 """
880 Pull SVN changes from the given log entry.
881 Returns the new SVN revision.
882 If an exception occurs, it will rollback to revision 'source_rev - 1'.
883 """
884 disp_svn_log_summary(log_entry)
885 source_rev = log_entry['revision']
886
887 # Process all the paths in this log entry
888 commit_paths = []
889 process_svn_log_entry(log_entry, source_repos_url, source_url, target_url,
890 rev_map, commit_paths)
891 # If we had too many individual paths to commit, wipe the list and just commit at
892 # the root of the working copy.
893 if len (commit_paths) > 99:
894 commit_paths = []
895
896 # Add source-tracking revprop's
897 revprops = [{'name':'source_uuid', 'value':source_repos_uuid},
898 {'name':'source_url', 'value':source_url},
899 {'name':'source_rev', 'value':source_rev}]
900 commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author, revprops=revprops)
901 print "(Finished source rev #"+str(source_rev)+")"
902
903 def main():
904 usage = "Usage: %prog [-a] [-c] [-r SVN rev] source_url target_url"
905 parser = OptionParser(usage)
906 parser.add_option("-r", "--revision", type="int", dest="svn_rev", metavar="REV",
907 help="initial SVN revision to checkout from")
908 parser.add_option("-a", "--keep-author", action="store_true", dest="keep_author",
909 help="maintain original Author info from source repo")
910 parser.add_option("-c", "--continue", action="store_true", dest="cont_from_break",
911 help="continue from previous break")
912 parser.add_option("-v", "--verbose", action="store_true", dest="verbose",
913 help="show 'svn status'-style messages for each action replayed [default]")
914 parser.add_option("-q", "--quiet", action="store_false", dest="verbose",
915 help="show only minimal status/progress messages")
916 parser.set_defaults(verbose=True)
917 group = OptionGroup(parser, "Debug Options")
918 group.add_option("--debug", action="store_true", dest="debug_all",
919 help="enable all debugging options")
920 group.add_option("--debug-showcmds", action="store_true", dest="debug_showcmds",
921 help="display each SVN command being executed")
922 group.add_option("--debug-debugmsgs", action="store_true", dest="debug_debugmsgs",
923 help="display debug messages")
924 parser.add_option_group(group)
925 (options, args) = parser.parse_args()
926 if len(args) != 2:
927 display_error("incorrect number of arguments\n\nTry: svn2svn.py --help",
928 False)
929
930 source_url = args.pop(0).rstrip("/")
931 target_url = args.pop(0).rstrip("/")
932 if options.keep_author:
933 keep_author = True
934 else:
935 keep_author = False
936
937 # Find the greatest_rev in the source repo
938 svn_info = get_svn_info(source_url)
939 greatest_rev = svn_info['revision']
940 # Get the base URL for the source repos, e.g. 'svn://svn.example.com/svn/repo'
941 source_repos_url = svn_info['repos_url']
942 # Get the UUID for the source repos
943 source_repos_uuid = svn_info['repos_uuid']
944
945 wc_target = "_wc_target"
946 rev_map = {}
947 global debug, runsvn_showcmd, svnlog_verbose
948
949 if options.debug_debugmsgs:
950 debug = True
951 if options.debug_showcmds:
952 runsvn_showcmd = True
953 if options.debug_all:
954 debug = True
955 runsvn_showcmd = True
956 if options.verbose:
957 svnlog_verbose = True
958
959 # if old working copy does not exist, disable continue mode
960 # TODO: Better continue support. Maybe include source repo's rev # in target commit info?
961 if not os.path.exists(wc_target):
962 options.cont_from_break = False
963
964 if not options.cont_from_break:
965 # Warn if Target SVN URL existed
966 cmd = find_program("svn")
967 pipe = Popen([cmd] + ["list"] + [target_url], executable=cmd,
968 stdout=PIPE, stderr=PIPE)
969 out, err = pipe.communicate()
970 if pipe.returncode == 0:
971 print "Target SVN URL: %s existed!" % target_url
972 if out:
973 print out
974 print "Press 'Enter' to Continue, 'Ctrl + C' to Cancel..."
975 print "(Timeout in 5 seconds)"
976 rfds, wfds, efds = select.select([sys.stdin], [], [], 5)
977
978 # Get log entry for the SVN revision we will check out
979 if options.svn_rev:
980 # If specify a rev, get log entry just before or at rev
981 svn_start_log = get_last_svn_log_entry(source_url, 1, options.svn_rev, False)
982 else:
983 # Otherwise, get log entry of branch creation
984 # TODO: This call is *very* expensive on a repo with lots of revisions.
985 # Even though the call is passing --limit 1, it seems like that limit-filter
986 # is happening after SVN has fetched the full log history.
987 svn_start_log = get_first_svn_log_entry(source_url, 1, greatest_rev, False)
988
989 # This is the revision we will start from for source_url
990 source_start_rev = svn_start_log['revision']
991
992 # Check out a working copy of target_url
993 wc_target = os.path.abspath(wc_target)
994 if os.path.exists(wc_target):
995 shutil.rmtree(wc_target)
996 svn_checkout(target_url, wc_target)
997 os.chdir(wc_target)
998
999 # For the initial commit to the target URL, export all the contents from
1000 # the source URL at the start-revision.
1001 paths = run_svn(["list", "-r", str(source_start_rev), source_url+"@"+str(source_start_rev)])
1002 if len(paths)>1:
1003 disp_svn_log_summary(get_one_svn_log_entry(source_url, source_start_rev, source_start_rev))
1004 print "(Initial import)"
1005 paths = paths.strip("\n").split("\n")
1006 for path in paths:
1007 # For each top-level file/folder...
1008 if not path:
1009 # Skip null lines
1010 break
1011 # Directories have a trailing slash in the "svn list" output
1012 path_is_dir = True if path[-1] == "/" else False
1013 if path_is_dir:
1014 path=path.rstrip('/')
1015 if not os.path.exists(path):
1016 os.makedirs(path)
1017 run_svn(["export", "--force", "-r" , str(source_start_rev), source_url+"/"+path+"@"+str(source_start_rev), path])
1018 run_svn(["add", path])
1019 revprops = [{'name':'source_uuid', 'value':source_repos_uuid},
1020 {'name':'source_url', 'value':source_url},
1021 {'name':'source_rev', 'value':source_start_rev}]
1022 commit_from_svn_log_entry(svn_start_log, [], keep_author=keep_author, revprops=revprops)
1023 print "(Finished source rev #"+str(source_start_rev)+")"
1024 else:
1025 wc_target = os.path.abspath(wc_target)
1026 os.chdir(wc_target)
1027 # TODO: Need better resume support. For the time being, expect caller explictly passes in resume revision.
1028 source_start_rev = options.svn_rev
1029 if source_start_rev < 1:
1030 display_error("Invalid arguments\n\nNeed to pass result rev # (-r) when using continue-mode (-c)", False)
1031
1032 # Load SVN log starting from source_start_rev + 1
1033 it_log_entries = iter_svn_log_entries(source_url, source_start_rev + 1, greatest_rev)
1034
1035 try:
1036 for log_entry in it_log_entries:
1037 # Replay this revision from source_url into target_url
1038 pull_svn_rev(log_entry, source_repos_url, source_repos_uuid, source_url,
1039 target_url, rev_map, keep_author)
1040 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
1041 run_svn(["up"])
1042 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
1043 dup_info = get_svn_info(target_url)
1044 dup_rev = dup_info['revision']
1045 source_rev = log_entry['revision']
1046 if debug:
1047 print "\x1b[32m" + ">> main: rev_map.add: source_rev=%s target_rev=%s" % (source_rev, dup_rev) + "\x1b[0m"
1048 rev_map[source_rev] = dup_rev
1049
1050 except KeyboardInterrupt:
1051 print "\nStopped by user."
1052 run_svn(["cleanup"])
1053 run_svn(["revert", "--recursive", "."])
1054 # TODO: Run "svn status" and pro-actively delete any "?" orphaned entries, to clean-up the WC?
1055 except:
1056 print "\nCommand failed with following error:\n"
1057 traceback.print_exc()
1058 run_svn(["cleanup"])
1059 run_svn(["revert", "--recursive", "."])
1060 # TODO: Run "svn status" and pro-actively delete any "?" orphaned entries, to clean-up the WC?
1061 finally:
1062 run_svn(["up"])
1063 print "\nFinished!"
1064
1065
1066 if __name__ == "__main__":
1067 main()
1068
1069 # vim:sts=4:sw=4: