]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn.py
Adding in_svn() for SVN 1.7 support
[svn2svn.git] / svn2svn.py
1 #!/usr/bin/env python
2 """
3 svn2svn.py
4
5 Replicate changesets from one SVN repository to another,
6 includes diffs, comments, and Dates of each revision.
7 It's also possible to retain the Author info if the Target SVN URL
8 is in a local filesystem (ie, running svn2svn.py on Target SVN server),
9 or if Target SVN URL is managed through ssh tunnel.
10 In later case, please run 'ssh-add' (adds RSA or DSA identities to
11 the authentication agent) before invoking svn2svn.py.
12
13 For example (in Unix environment):
14 $ exec /usr/bin/ssh-agent $SHELL
15 $ /usr/bin/ssh-add
16 Enter passphrase for /home/user/.ssh/id_dsa:
17 Identity added: /home/user/.ssh/id_dsa (/home/user/.ssh/id_dsa)
18 $ python ./svn2svn.py -a SOURCE TARGET
19
20 Written and used on Ubuntu 7.04 (Feisty Fawn).
21 Provided as-is and absolutely no warranty - aka Don't bet your life on it.
22
23 This tool re-used some modules from svnclient.py on project hgsvn
24 (a tool can create Mercurial repository from SVN repository):
25 http://cheeseshop.python.org/pypi/hgsvn
26
27 License: GPLv2, the same as hgsvn.
28
29 version 0.1.1; Jul 31, 2007; simford dot dong at gmail dot com
30 """
31
32 import os
33 import sys
34 import time
35 import locale
36 import shutil
37 import select
38 import calendar
39 import traceback
40
41 from optparse import OptionParser
42 from subprocess import Popen, PIPE
43 from datetime import datetime
44
45 try:
46 from xml.etree import cElementTree as ET
47 except ImportError:
48 try:
49 from xml.etree import ElementTree as ET
50 except ImportError:
51 try:
52 import cElementTree as ET
53 except ImportError:
54 from elementtree import ElementTree as ET
55
56 svn_log_args = ['log', '--xml', '-v']
57 svn_info_args = ['info', '--xml']
58 svn_checkout_args = ['checkout', '-q']
59 svn_status_args = ['status', '--xml', '-v', '--ignore-externals']
60
61 # define exception class
62 class ExternalCommandFailed(RuntimeError):
63 """
64 An external command failed.
65 """
66
67 class ParameterError(RuntimeError):
68 """
69 An external command failed.
70 """
71
72 def display_error(message, raise_exception = True):
73 """
74 Display error message, then terminate.
75 """
76 print "Error:", message
77 print
78 if raise_exception:
79 raise ExternalCommandFailed
80 else:
81 sys.exit(1)
82
83 # Windows compatibility code by Bill Baxter
84 if os.name == "nt":
85 def find_program(name):
86 """
87 Find the name of the program for Popen.
88 Windows is finnicky about having the complete file name. Popen
89 won't search the %PATH% for you automatically.
90 (Adapted from ctypes.find_library)
91 """
92 # See MSDN for the REAL search order.
93 base, ext = os.path.splitext(name)
94 if ext:
95 exts = [ext]
96 else:
97 exts = ['.bat', '.exe']
98 for directory in os.environ['PATH'].split(os.pathsep):
99 for e in exts:
100 fname = os.path.join(directory, base + e)
101 if os.path.exists(fname):
102 return fname
103 return None
104 else:
105 def find_program(name):
106 """
107 Find the name of the program for Popen.
108 On Unix, popen isn't picky about having absolute paths.
109 """
110 return name
111
112 def shell_quote(s):
113 if os.name == "nt":
114 q = '"'
115 else:
116 q = "'"
117 return q + s.replace('\\', '\\\\').replace("'", "'\"'\"'") + q
118
119 locale_encoding = locale.getpreferredencoding()
120
121 def run_svn(args, fail_if_stderr=False, encoding="utf-8"):
122 """
123 Run svn cmd in PIPE
124 exit if svn cmd failed
125 """
126 def _transform_arg(a):
127 if isinstance(a, unicode):
128 a = a.encode(encoding or locale_encoding)
129 elif not isinstance(a, str):
130 a = str(a)
131 return a
132 t_args = map(_transform_arg, args)
133
134 cmd = find_program("svn")
135 cmd_string = str(" ".join(map(shell_quote, [cmd] + t_args)))
136 print "*", cmd_string
137 pipe = Popen([cmd] + t_args, executable=cmd, stdout=PIPE, stderr=PIPE)
138 out, err = pipe.communicate()
139 if pipe.returncode != 0 or (fail_if_stderr and err.strip()):
140 display_error("External program failed (return code %d): %s\n%s"
141 % (pipe.returncode, cmd_string, err))
142 return out
143
144 def svn_date_to_timestamp(svn_date):
145 """
146 Parse an SVN date as read from the XML output and
147 return the corresponding timestamp.
148 """
149 # Strip microseconds and timezone (always UTC, hopefully)
150 # XXX there are various ISO datetime parsing routines out there,
151 # cf. http://seehuhn.de/comp/pdate
152 date = svn_date.split('.', 2)[0]
153 time_tuple = time.strptime(date, "%Y-%m-%dT%H:%M:%S")
154 return calendar.timegm(time_tuple)
155
156 def parse_svn_info_xml(xml_string):
157 """
158 Parse the XML output from an "svn info" command and extract
159 useful information as a dict.
160 """
161 d = {}
162 tree = ET.fromstring(xml_string)
163 entry = tree.find('.//entry')
164 if entry:
165 d['url'] = entry.find('url').text
166 d['revision'] = int(entry.get('revision'))
167 d['repos_url'] = tree.find('.//repository/root').text
168 d['last_changed_rev'] = int(tree.find('.//commit').get('revision'))
169 d['kind'] = entry.get('kind')
170 return d
171
172 def parse_svn_log_xml(xml_string):
173 """
174 Parse the XML output from an "svn log" command and extract
175 useful information as a list of dicts (one per log changeset).
176 """
177 l = []
178 tree = ET.fromstring(xml_string)
179 for entry in tree.findall('logentry'):
180 d = {}
181 d['revision'] = int(entry.get('revision'))
182 # Some revisions don't have authors, most notably
183 # the first revision in a repository.
184 author = entry.find('author')
185 d['author'] = author is not None and author.text or None
186 d['date'] = svn_date_to_timestamp(entry.find('date').text)
187 # Some revisions may have empty commit message
188 message = entry.find('msg')
189 message = message is not None and message.text is not None \
190 and message.text.strip() or ""
191 # Replace DOS return '\r\n' and MacOS return '\r' with unix return '\n'
192 d['message'] = message.replace('\r\n', '\n').replace('\n\r', '\n'). \
193 replace('\r', '\n')
194 paths = d['changed_paths'] = []
195 for path in entry.findall('.//path'):
196 copyfrom_rev = path.get('copyfrom-rev')
197 if copyfrom_rev:
198 copyfrom_rev = int(copyfrom_rev)
199 paths.append({
200 'path': path.text,
201 'action': path.get('action'),
202 'copyfrom_path': path.get('copyfrom-path'),
203 'copyfrom_revision': copyfrom_rev,
204 })
205 l.append(d)
206 return l
207
208 def parse_svn_status_xml(xml_string, base_dir=None):
209 """
210 Parse the XML output from an "svn status" command and extract
211 useful info as a list of dicts (one per status entry).
212 """
213 l = []
214 tree = ET.fromstring(xml_string)
215 for entry in tree.findall('.//entry'):
216 d = {}
217 path = entry.get('path')
218 if base_dir is not None:
219 assert path.startswith(base_dir)
220 path = path[len(base_dir):].lstrip('/\\')
221 d['path'] = path
222 wc_status = entry.find('wc-status')
223 if wc_status.get('item') == 'external':
224 d['type'] = 'external'
225 elif wc_status.get('revision') is not None:
226 d['type'] = 'normal'
227 else:
228 d['type'] = 'unversioned'
229 l.append(d)
230 return l
231
232 def get_svn_info(svn_url_or_wc, rev_number=None):
233 """
234 Get SVN information for the given URL or working copy,
235 with an optionally specified revision number.
236 Returns a dict as created by parse_svn_info_xml().
237 """
238 if rev_number is not None:
239 args = [svn_url_or_wc + "@" + str(rev_number)]
240 else:
241 args = [svn_url_or_wc]
242 xml_string = run_svn(svn_info_args + args,
243 fail_if_stderr=True)
244 return parse_svn_info_xml(xml_string)
245
246 def svn_checkout(svn_url, checkout_dir, rev_number=None):
247 """
248 Checkout the given URL at an optional revision number.
249 """
250 args = []
251 if rev_number is not None:
252 args += ['-r', rev_number]
253 args += [svn_url, checkout_dir]
254 return run_svn(svn_checkout_args + args)
255
256 def run_svn_log(svn_url_or_wc, rev_start, rev_end, limit, stop_on_copy=False):
257 """
258 Fetch up to 'limit' SVN log entries between the given revisions.
259 """
260 if stop_on_copy:
261 args = ['--stop-on-copy']
262 else:
263 args = []
264 if rev_start != 'HEAD' and rev_end != 'HEAD':
265 args += ['-r', '%s:%s' % (rev_start, rev_end)]
266 args += ['--limit', str(limit), svn_url_or_wc]
267 xml_string = run_svn(svn_log_args + args)
268 return parse_svn_log_xml(xml_string)
269
270 def get_svn_status(svn_wc, flags=None):
271 """
272 Get SVN status information about the given working copy.
273 """
274 # Ensure proper stripping by canonicalizing the path
275 svn_wc = os.path.abspath(svn_wc)
276 args = []
277 if flags:
278 args += [flags]
279 args += [svn_wc]
280 xml_string = run_svn(svn_status_args + args)
281 return parse_svn_status_xml(xml_string, svn_wc)
282
283 def get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=False):
284 """
285 Get the first SVN log entry in the requested revision range.
286 """
287 entries = run_svn_log(svn_url, rev_start, rev_end, 1, stop_on_copy)
288 if not entries:
289 display_error("No SVN log for %s between revisions %s and %s" %
290 (svn_url, rev_start, rev_end))
291
292 return entries[0]
293
294 def get_first_svn_log_entry(svn_url, rev_start, rev_end):
295 """
296 Get the first log entry after/at the given revision number in an SVN branch.
297 By default the revision number is set to 0, which will give you the log
298 entry corresponding to the branch creaction.
299
300 NOTE: to know whether the branch creation corresponds to an SVN import or
301 a copy from another branch, inspect elements of the 'changed_paths' entry
302 in the returned dictionary.
303 """
304 return get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True)
305
306 def get_last_svn_log_entry(svn_url, rev_start, rev_end):
307 """
308 Get the last log entry before/at the given revision number in an SVN branch.
309 By default the revision number is set to HEAD, which will give you the log
310 entry corresponding to the latest commit in branch.
311 """
312 return get_one_svn_log_entry(svn_url, rev_end, rev_start, stop_on_copy=True)
313
314
315 log_duration_threshold = 10.0
316 log_min_chunk_length = 10
317
318 def iter_svn_log_entries(svn_url, first_rev, last_rev):
319 """
320 Iterate over SVN log entries between first_rev and last_rev.
321
322 This function features chunked log fetching so that it isn't too nasty
323 to the SVN server if many entries are requested.
324 """
325 cur_rev = first_rev
326 chunk_length = log_min_chunk_length
327 chunk_interval_factor = 1.0
328 while last_rev == "HEAD" or cur_rev <= last_rev:
329 start_t = time.time()
330 stop_rev = min(last_rev, cur_rev + int(chunk_length * chunk_interval_factor))
331 entries = run_svn_log(svn_url, cur_rev, stop_rev, chunk_length)
332 duration = time.time() - start_t
333 if not entries:
334 if stop_rev == last_rev:
335 break
336 cur_rev = stop_rev + 1
337 chunk_interval_factor *= 2.0
338 continue
339 for e in entries:
340 yield e
341 cur_rev = e['revision'] + 1
342 # Adapt chunk length based on measured request duration
343 if duration < log_duration_threshold:
344 chunk_length = int(chunk_length * 2.0)
345 elif duration > log_duration_threshold * 2:
346 chunk_length = max(log_min_chunk_length, int(chunk_length / 2.0))
347
348 def commit_from_svn_log_entry(entry, files=None, keep_author=False):
349 """
350 Given an SVN log entry and an optional sequence of files, do an svn commit.
351 """
352 # This will use the local timezone for displaying commit times
353 timestamp = int(entry['date'])
354 svn_date = str(datetime.fromtimestamp(timestamp))
355 # Uncomment this one one if you prefer UTC commit times
356 #svn_date = "%d 0" % timestamp
357 if keep_author:
358 options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date, "--username", entry['author']]
359 else:
360 options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date + "\nAuthor: " + entry['author']]
361 if files:
362 options += list(files)
363 run_svn(options)
364
365 def in_svn(p):
366 entries = get_svn_status(p)
367 if not entries:
368 return False
369 d = entries[0]
370 return (d['type'] == 'normal')
371
372 def svn_add_dir(p):
373 # set p = "." when p = ""
374 #p = p.strip() or "."
375 if p.strip() and not in_svn(p):
376 svn_add_dir(os.path.dirname(p))
377 if not os.path.exists(p):
378 os.makedirs(p)
379 run_svn(["add", p])
380
381 def pull_svn_rev(log_entry, svn_url, target_url, svn_path, original_wc, keep_author=False):
382 """
383 Pull SVN changes from the given log entry.
384 Returns the new SVN revision.
385 If an exception occurs, it will rollback to revision 'svn_rev - 1'.
386 """
387 svn_rev = log_entry['revision']
388 run_svn(["up", "--ignore-externals", "-r", svn_rev, original_wc])
389
390 removed_paths = []
391 merged_paths = []
392 unrelated_paths = []
393 commit_paths = []
394 for d in log_entry['changed_paths']:
395 # e.g. u'/branches/xmpp/twisted/words/test/test.py'
396 p = d['path']
397 if not p.startswith(svn_path + "/"):
398 # Ignore changed files that are not part of this subdir
399 if p != svn_path:
400 unrelated_paths.append(p)
401 continue
402 # e.g. u'twisted/words/test/test.py'
403 p = p[len(svn_path):].strip("/")
404 # Record for commit
405 action = d['action']
406 if action not in 'MARD':
407 display_error("In SVN rev. %d: action '%s' not supported. \
408 Please report a bug!" % (svn_rev, action))
409
410 if len (commit_paths) < 100:
411 commit_paths.append(p)
412 # Detect special cases
413 old_p = d['copyfrom_path']
414 if old_p and old_p.startswith(svn_path + "/"):
415 old_p = old_p[len(svn_path):].strip("/")
416 # Both paths can be identical if copied from an old rev.
417 # We treat like it a normal change.
418 if old_p != p:
419 if not in_svn(p):
420 svn_add_dir(os.path.dirname(p))
421 run_svn(["up", old_p])
422 run_svn(["copy", old_p, p])
423 if os.path.isfile(p):
424 shutil.copy(original_wc + os.sep + p, p)
425 if action == 'R':
426 removed_paths.append(old_p)
427 if len (commit_paths) < 100:
428 commit_paths.append(old_p)
429 continue
430 if action == 'A':
431 if os.path.isdir(original_wc + os.sep + p):
432 svn_add_dir(p)
433 else:
434 p_path = os.path.dirname(p).strip() or '.'
435 svn_add_dir(p_path)
436 shutil.copy(original_wc + os.sep + p, p)
437 run_svn(["add", p])
438 elif action == 'D':
439 removed_paths.append(p)
440 else: # action == 'M'
441 merged_paths.append(p)
442
443 if removed_paths:
444 for r in removed_paths:
445 run_svn(["up", r])
446 run_svn(["remove", "--force", r])
447
448 if merged_paths:
449 for m in merged_paths:
450 run_svn(["up", m])
451 m_url = svn_url + "/" + m
452 out = run_svn(["merge", "-c", str(svn_rev), "--non-recursive",
453 "--non-interactive", "--accept=theirs-full",
454 m_url+"@"+str(svn_rev), m])
455 # if conflicts, use the copy from original_wc
456 if out and out.split()[0] == 'C':
457 print "\n### Conflicts ignored: %s, in revision: %s\n" \
458 % (m, svn_rev)
459 run_svn(["revert", "--recursive", m])
460 if os.path.isfile(m):
461 shutil.copy(original_wc + os.sep + m, m)
462
463 if unrelated_paths:
464 print "Unrelated paths: "
465 print "*", unrelated_paths
466
467 ## too many files
468 if len (commit_paths) > 99:
469 commit_paths = []
470
471 try:
472 commit_from_svn_log_entry(log_entry, commit_paths,
473 keep_author=keep_author)
474 except ExternalCommandFailed:
475 # try to ignore the Properties conflicts on files and dirs
476 # use the copy from original_wc
477 has_Conflict = False
478 for d in log_entry['changed_paths']:
479 p = d['path']
480 p = p[len(svn_path):].strip("/")
481 if os.path.isfile(p):
482 if os.path.isfile(p + ".prej"):
483 has_Conflict = True
484 shutil.copy(original_wc + os.sep + p, p)
485 p2=os.sep + p.replace('_', '__').replace('/', '_') \
486 + ".prej-" + str(svn_rev)
487 shutil.move(p + ".prej", os.path.dirname(original_wc) + p2)
488 w="\n### Properties conflicts ignored:"
489 print "%s %s, in revision: %s\n" % (w, p, svn_rev)
490 elif os.path.isdir(p):
491 if os.path.isfile(p + os.sep + "dir_conflicts.prej"):
492 has_Conflict = True
493 p2=os.sep + p.replace('_', '__').replace('/', '_') \
494 + "_dir__conflicts.prej-" + str(svn_rev)
495 shutil.move(p + os.sep + "dir_conflicts.prej",
496 os.path.dirname(original_wc) + p2)
497 w="\n### Properties conflicts ignored:"
498 print "%s %s, in revision: %s\n" % (w, p, svn_rev)
499 out = run_svn(["propget", "svn:ignore",
500 original_wc + os.sep + p])
501 if out:
502 run_svn(["propset", "svn:ignore", out.strip(), p])
503 out = run_svn(["propget", "svn:externel",
504 original_wc + os.sep + p])
505 if out:
506 run_svn(["propset", "svn:external", out.strip(), p])
507 # try again
508 if has_Conflict:
509 commit_from_svn_log_entry(log_entry, commit_paths,
510 keep_author=keep_author)
511 else:
512 raise ExternalCommandFailed
513
514
515 def main():
516 usage = "Usage: %prog [-a] [-c] [-r SVN rev] <Source SVN URL> <Target SVN URL>"
517 parser = OptionParser(usage)
518 parser.add_option("-a", "--keep-author", action="store_true",
519 dest="keep_author", help="Keep revision Author or not")
520 parser.add_option("-c", "--continue-from-break", action="store_true",
521 dest="cont_from_break",
522 help="Continue from previous break")
523 parser.add_option("-r", "--svn-rev", type="int", dest="svn_rev",
524 help="SVN revision to checkout from")
525 (options, args) = parser.parse_args()
526 if len(args) != 2:
527 display_error("incorrect number of arguments\n\nTry: svn2svn.py --help",
528 False)
529
530 source_url = args.pop(0).rstrip("/")
531 target_url = args.pop(0).rstrip("/")
532 if options.keep_author:
533 keep_author = True
534 else:
535 keep_author = False
536
537 # Find the greatest_rev
538 # don't use 'svn info' to get greatest_rev, it doesn't work sometimes
539 svn_log = get_one_svn_log_entry(source_url, "HEAD", "HEAD")
540 greatest_rev = svn_log['revision']
541
542 original_wc = "_original_wc"
543 dup_wc = "_dup_wc"
544
545 ## old working copy does not exist, disable continue mode
546 if not os.path.exists(dup_wc):
547 options.cont_from_break = False
548
549 if not options.cont_from_break:
550 # Warn if Target SVN URL existed
551 cmd = find_program("svn")
552 pipe = Popen([cmd] + ["list"] + [target_url], executable=cmd,
553 stdout=PIPE, stderr=PIPE)
554 out, err = pipe.communicate()
555 if pipe.returncode == 0:
556 print "Target SVN URL: %s existed!" % target_url
557 if out:
558 print out
559 print "Press 'Enter' to Continue, 'Ctrl + C' to Cancel..."
560 print "(Timeout in 5 seconds)"
561 rfds, wfds, efds = select.select([sys.stdin], [], [], 5)
562
563 # Get log entry for the SVN revision we will check out
564 if options.svn_rev:
565 # If specify a rev, get log entry just before or at rev
566 svn_start_log = get_last_svn_log_entry(source_url, 1,
567 options.svn_rev)
568 else:
569 # Otherwise, get log entry of branch creation
570 svn_start_log = get_first_svn_log_entry(source_url, 1,
571 greatest_rev)
572
573 # This is the revision we will checkout from
574 svn_rev = svn_start_log['revision']
575
576 # Check out first revision (changeset) from Source SVN URL
577 if os.path.exists(original_wc):
578 shutil.rmtree(original_wc)
579 svn_checkout(source_url, original_wc, svn_rev)
580
581 # Import first revision (changeset) into Target SVN URL
582 timestamp = int(svn_start_log['date'])
583 svn_date = str(datetime.fromtimestamp(timestamp))
584 if keep_author:
585 run_svn(["import", original_wc, target_url, "-m",
586 svn_start_log['message'] + "\nDate: " + svn_date,
587 "--username", svn_start_log['author']])
588 else:
589 run_svn(["import", original_wc, target_url, "-m",
590 svn_start_log['message'] + "\nDate: " + svn_date +
591 "\nAuthor: " + svn_start_log['author']])
592
593 # Check out a working copy
594 if os.path.exists(dup_wc):
595 shutil.rmtree(dup_wc)
596 svn_checkout(target_url, dup_wc)
597
598 original_wc = os.path.abspath(original_wc)
599 dup_wc = os.path.abspath(dup_wc)
600 os.chdir(dup_wc)
601
602 # Get SVN info
603 svn_info = get_svn_info(original_wc)
604 # e.g. u'svn://svn.twistedmatrix.com/svn/Twisted'
605 repos_url = svn_info['repos_url']
606 # e.g. u'svn://svn.twistedmatrix.com/svn/Twisted/branches/xmpp'
607 svn_url = svn_info['url']
608 assert svn_url.startswith(repos_url)
609 # e.g. u'/branches/xmpp'
610 svn_path = svn_url[len(repos_url):]
611 # e.g. 'xmpp'
612 svn_branch = svn_url.split("/")[-1]
613
614 if options.cont_from_break:
615 svn_rev = svn_info['revision'] - 1
616 if svn_rev < 1:
617 svn_rev = 1
618
619 # Load SVN log starting from svn_rev + 1
620 it_log_entries = iter_svn_log_entries(svn_url, svn_rev + 1, greatest_rev)
621
622 try:
623 for log_entry in it_log_entries:
624 pull_svn_rev(log_entry, svn_url, target_url, svn_path,
625 original_wc, keep_author)
626
627 except KeyboardInterrupt:
628 print "\nStopped by user."
629 run_svn(["cleanup"])
630 run_svn(["revert", "--recursive", "."])
631 except:
632 print "\nCommand failed with following error:\n"
633 traceback.print_exc()
634 run_svn(["cleanup"])
635 run_svn(["revert", "--recursive", "."])
636 finally:
637 run_svn(["up"])
638 print "\nFinished!"
639
640
641 if __name__ == "__main__":
642 main()
643