]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn.py
Initial commit: http://svn2svn.googlecode.com/svn/trunk/ r14
[svn2svn.git] / svn2svn.py
1 #!/usr/bin/env python
2 """
3 svn2svn.py
4
5 Replicate changesets from one SVN repository to another,
6 includes diffs, comments, and Dates of each revision.
7 It's also possible to retain the Author info if the Target SVN URL
8 is in a local filesystem (ie, running svn2svn.py on Target SVN server),
9 or if Target SVN URL is managed through ssh tunnel.
10 In later case, please run 'ssh-add' (adds RSA or DSA identities to
11 the authentication agent) before invoking svn2svn.py.
12
13 For example (in Unix environment):
14 $ exec /usr/bin/ssh-agent $SHELL
15 $ /usr/bin/ssh-add
16 Enter passphrase for /home/user/.ssh/id_dsa:
17 Identity added: /home/user/.ssh/id_dsa (/home/user/.ssh/id_dsa)
18 $ python ./svn2svn.py -a SOURCE TARGET
19
20 Written and used on Ubuntu 7.04 (Feisty Fawn).
21 Provided as-is and absolutely no warranty - aka Don't bet your life on it.
22
23 This tool re-used some modules from svnclient.py on project hgsvn
24 (a tool can create Mercurial repository from SVN repository):
25 http://cheeseshop.python.org/pypi/hgsvn
26
27 License: GPLv2, the same as hgsvn.
28
29 version 0.1.1; Jul 31, 2007; simford dot dong at gmail dot com
30 """
31
32 import os
33 import sys
34 import time
35 import locale
36 import shutil
37 import select
38 import calendar
39 import traceback
40
41 from optparse import OptionParser
42 from subprocess import Popen, PIPE
43 from datetime import datetime
44
45 try:
46 from xml.etree import cElementTree as ET
47 except ImportError:
48 try:
49 from xml.etree import ElementTree as ET
50 except ImportError:
51 try:
52 import cElementTree as ET
53 except ImportError:
54 from elementtree import ElementTree as ET
55
56 svn_log_args = ['log', '--xml', '-v']
57 svn_info_args = ['info', '--xml']
58 svn_checkout_args = ['checkout', '-q']
59 svn_status_args = ['status', '--xml', '-v', '--ignore-externals']
60
61 # define exception class
62 class ExternalCommandFailed(RuntimeError):
63 """
64 An external command failed.
65 """
66
67 class ParameterError(RuntimeError):
68 """
69 An external command failed.
70 """
71
72 def display_error(message, raise_exception = True):
73 """
74 Display error message, then terminate.
75 """
76 print "Error:", message
77 print
78 if raise_exception:
79 raise ExternalCommandFailed
80 else:
81 sys.exit(1)
82
83 # Windows compatibility code by Bill Baxter
84 if os.name == "nt":
85 def find_program(name):
86 """
87 Find the name of the program for Popen.
88 Windows is finnicky about having the complete file name. Popen
89 won't search the %PATH% for you automatically.
90 (Adapted from ctypes.find_library)
91 """
92 # See MSDN for the REAL search order.
93 base, ext = os.path.splitext(name)
94 if ext:
95 exts = [ext]
96 else:
97 exts = ['.bat', '.exe']
98 for directory in os.environ['PATH'].split(os.pathsep):
99 for e in exts:
100 fname = os.path.join(directory, base + e)
101 if os.path.exists(fname):
102 return fname
103 return None
104 else:
105 def find_program(name):
106 """
107 Find the name of the program for Popen.
108 On Unix, popen isn't picky about having absolute paths.
109 """
110 return name
111
112 def shell_quote(s):
113 if os.name == "nt":
114 q = '"'
115 else:
116 q = "'"
117 return q + s.replace('\\', '\\\\').replace("'", "'\"'\"'") + q
118
119 locale_encoding = locale.getpreferredencoding()
120
121 def run_svn(args, fail_if_stderr=False, encoding="utf-8"):
122 """
123 Run svn cmd in PIPE
124 exit if svn cmd failed
125 """
126 def _transform_arg(a):
127 if isinstance(a, unicode):
128 a = a.encode(encoding or locale_encoding)
129 elif not isinstance(a, str):
130 a = str(a)
131 return a
132 t_args = map(_transform_arg, args)
133
134 cmd = find_program("svn")
135 cmd_string = str(" ".join(map(shell_quote, [cmd] + t_args)))
136 print "*", cmd_string
137 pipe = Popen([cmd] + t_args, executable=cmd, stdout=PIPE, stderr=PIPE)
138 out, err = pipe.communicate()
139 if pipe.returncode != 0 or (fail_if_stderr and err.strip()):
140 display_error("External program failed (return code %d): %s\n%s"
141 % (pipe.returncode, cmd_string, err))
142 return out
143
144 def svn_date_to_timestamp(svn_date):
145 """
146 Parse an SVN date as read from the XML output and
147 return the corresponding timestamp.
148 """
149 # Strip microseconds and timezone (always UTC, hopefully)
150 # XXX there are various ISO datetime parsing routines out there,
151 # cf. http://seehuhn.de/comp/pdate
152 date = svn_date.split('.', 2)[0]
153 time_tuple = time.strptime(date, "%Y-%m-%dT%H:%M:%S")
154 return calendar.timegm(time_tuple)
155
156 def parse_svn_info_xml(xml_string):
157 """
158 Parse the XML output from an "svn info" command and extract
159 useful information as a dict.
160 """
161 d = {}
162 tree = ET.fromstring(xml_string)
163 entry = tree.find('.//entry')
164 if entry:
165 d['url'] = entry.find('url').text
166 d['revision'] = int(entry.get('revision'))
167 d['repos_url'] = tree.find('.//repository/root').text
168 d['last_changed_rev'] = int(tree.find('.//commit').get('revision'))
169 d['kind'] = entry.get('kind')
170 return d
171
172 def parse_svn_log_xml(xml_string):
173 """
174 Parse the XML output from an "svn log" command and extract
175 useful information as a list of dicts (one per log changeset).
176 """
177 l = []
178 tree = ET.fromstring(xml_string)
179 for entry in tree.findall('logentry'):
180 d = {}
181 d['revision'] = int(entry.get('revision'))
182 # Some revisions don't have authors, most notably
183 # the first revision in a repository.
184 author = entry.find('author')
185 d['author'] = author is not None and author.text or None
186 d['date'] = svn_date_to_timestamp(entry.find('date').text)
187 # Some revisions may have empty commit message
188 message = entry.find('msg')
189 message = message is not None and message.text is not None \
190 and message.text.strip() or ""
191 # Replace DOS return '\r\n' and MacOS return '\r' with unix return '\n'
192 d['message'] = message.replace('\r\n', '\n').replace('\n\r', '\n'). \
193 replace('\r', '\n')
194 paths = d['changed_paths'] = []
195 for path in entry.findall('.//path'):
196 copyfrom_rev = path.get('copyfrom-rev')
197 if copyfrom_rev:
198 copyfrom_rev = int(copyfrom_rev)
199 paths.append({
200 'path': path.text,
201 'action': path.get('action'),
202 'copyfrom_path': path.get('copyfrom-path'),
203 'copyfrom_revision': copyfrom_rev,
204 })
205 l.append(d)
206 return l
207
208 def parse_svn_status_xml(xml_string, base_dir=None):
209 """
210 Parse the XML output from an "svn status" command and extract
211 useful info as a list of dicts (one per status entry).
212 """
213 l = []
214 tree = ET.fromstring(xml_string)
215 for entry in tree.findall('.//entry'):
216 d = {}
217 path = entry.get('path')
218 if base_dir is not None:
219 assert path.startswith(base_dir)
220 path = path[len(base_dir):].lstrip('/\\')
221 d['path'] = path
222 wc_status = entry.find('wc-status')
223 if wc_status.get('item') == 'external':
224 d['type'] = 'external'
225 elif wc_status.get('revision') is not None:
226 d['type'] = 'normal'
227 else:
228 d['type'] = 'unversioned'
229 l.append(d)
230 return l
231
232 def get_svn_info(svn_url_or_wc, rev_number=None):
233 """
234 Get SVN information for the given URL or working copy,
235 with an optionally specified revision number.
236 Returns a dict as created by parse_svn_info_xml().
237 """
238 if rev_number is not None:
239 args = [svn_url_or_wc + "@" + str(rev_number)]
240 else:
241 args = [svn_url_or_wc]
242 xml_string = run_svn(svn_info_args + args,
243 fail_if_stderr=True)
244 return parse_svn_info_xml(xml_string)
245
246 def svn_checkout(svn_url, checkout_dir, rev_number=None):
247 """
248 Checkout the given URL at an optional revision number.
249 """
250 args = []
251 if rev_number is not None:
252 args += ['-r', rev_number]
253 args += [svn_url, checkout_dir]
254 return run_svn(svn_checkout_args + args)
255
256 def run_svn_log(svn_url_or_wc, rev_start, rev_end, limit, stop_on_copy=False):
257 """
258 Fetch up to 'limit' SVN log entries between the given revisions.
259 """
260 if stop_on_copy:
261 args = ['--stop-on-copy']
262 else:
263 args = []
264 args += ['-r', '%s:%s' % (rev_start, rev_end), '--limit',
265 str(limit), svn_url_or_wc]
266 xml_string = run_svn(svn_log_args + args)
267 return parse_svn_log_xml(xml_string)
268
269 def get_svn_status(svn_wc):
270 """
271 Get SVN status information about the given working copy.
272 """
273 # Ensure proper stripping by canonicalizing the path
274 svn_wc = os.path.abspath(svn_wc)
275 args = [svn_wc]
276 xml_string = run_svn(svn_status_args + args)
277 return parse_svn_status_xml(xml_string, svn_wc)
278
279 def get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=False):
280 """
281 Get the first SVN log entry in the requested revision range.
282 """
283 entries = run_svn_log(svn_url, rev_start, rev_end, 1, stop_on_copy)
284 if not entries:
285 display_error("No SVN log for %s between revisions %s and %s" %
286 (svn_url, rev_start, rev_end))
287
288 return entries[0]
289
290 def get_first_svn_log_entry(svn_url, rev_start, rev_end):
291 """
292 Get the first log entry after/at the given revision number in an SVN branch.
293 By default the revision number is set to 0, which will give you the log
294 entry corresponding to the branch creaction.
295
296 NOTE: to know whether the branch creation corresponds to an SVN import or
297 a copy from another branch, inspect elements of the 'changed_paths' entry
298 in the returned dictionary.
299 """
300 return get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True)
301
302 def get_last_svn_log_entry(svn_url, rev_start, rev_end):
303 """
304 Get the last log entry before/at the given revision number in an SVN branch.
305 By default the revision number is set to HEAD, which will give you the log
306 entry corresponding to the latest commit in branch.
307 """
308 return get_one_svn_log_entry(svn_url, rev_end, rev_start, stop_on_copy=True)
309
310
311 log_duration_threshold = 10.0
312 log_min_chunk_length = 10
313
314 def iter_svn_log_entries(svn_url, first_rev, last_rev):
315 """
316 Iterate over SVN log entries between first_rev and last_rev.
317
318 This function features chunked log fetching so that it isn't too nasty
319 to the SVN server if many entries are requested.
320 """
321 cur_rev = first_rev
322 chunk_length = log_min_chunk_length
323 chunk_interval_factor = 1.0
324 while last_rev == "HEAD" or cur_rev <= last_rev:
325 start_t = time.time()
326 stop_rev = min(last_rev, cur_rev + int(chunk_length * chunk_interval_factor))
327 entries = run_svn_log(svn_url, cur_rev, stop_rev, chunk_length)
328 duration = time.time() - start_t
329 if not entries:
330 if stop_rev == last_rev:
331 break
332 cur_rev = stop_rev + 1
333 chunk_interval_factor *= 2.0
334 continue
335 for e in entries:
336 yield e
337 cur_rev = e['revision'] + 1
338 # Adapt chunk length based on measured request duration
339 if duration < log_duration_threshold:
340 chunk_length = int(chunk_length * 2.0)
341 elif duration > log_duration_threshold * 2:
342 chunk_length = max(log_min_chunk_length, int(chunk_length / 2.0))
343
344 def commit_from_svn_log_entry(entry, files=None, keep_author=False):
345 """
346 Given an SVN log entry and an optional sequence of files, do an svn commit.
347 """
348 # This will use the local timezone for displaying commit times
349 timestamp = int(entry['date'])
350 svn_date = str(datetime.fromtimestamp(timestamp))
351 # Uncomment this one one if you prefer UTC commit times
352 #svn_date = "%d 0" % timestamp
353 if keep_author:
354 options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date, "--username", entry['author']]
355 else:
356 options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date + "\nAuthor: " + entry['author']]
357 if files:
358 options += list(files)
359 run_svn(options)
360
361 def svn_add_dir(p):
362 # set p = "." when p = ""
363 #p = p.strip() or "."
364 if p.strip() and not os.path.exists(p + os.sep + ".svn"):
365 svn_add_dir(os.path.dirname(p))
366 if not os.path.exists(p):
367 os.makedirs(p)
368 run_svn(["add", p])
369
370 def pull_svn_rev(log_entry, svn_url, target_url, svn_path, original_wc, keep_author=False):
371 """
372 Pull SVN changes from the given log entry.
373 Returns the new SVN revision.
374 If an exception occurs, it will rollback to revision 'svn_rev - 1'.
375 """
376 svn_rev = log_entry['revision']
377 run_svn(["up", "--ignore-externals", "-r", svn_rev, original_wc])
378
379 removed_paths = []
380 merged_paths = []
381 unrelated_paths = []
382 commit_paths = []
383 for d in log_entry['changed_paths']:
384 # e.g. u'/branches/xmpp/twisted/words/test/test.py'
385 p = d['path']
386 if not p.startswith(svn_path + "/"):
387 # Ignore changed files that are not part of this subdir
388 if p != svn_path:
389 unrelated_paths.append(p)
390 continue
391 # e.g. u'twisted/words/test/test.py'
392 p = p[len(svn_path):].strip("/")
393 # Record for commit
394 action = d['action']
395 if action not in 'MARD':
396 display_error("In SVN rev. %d: action '%s' not supported. \
397 Please report a bug!" % (svn_rev, action))
398
399 if len (commit_paths) < 100:
400 commit_paths.append(p)
401 # Detect special cases
402 old_p = d['copyfrom_path']
403 if old_p and old_p.startswith(svn_path + "/"):
404 old_p = old_p[len(svn_path):].strip("/")
405 # Both paths can be identical if copied from an old rev.
406 # We treat like it a normal change.
407 if old_p != p:
408 if not os.path.exists(p + os.sep + '.svn'):
409 svn_add_dir(os.path.dirname(p))
410 run_svn(["up", old_p])
411 run_svn(["copy", old_p, p])
412 if os.path.isfile(p):
413 shutil.copy(original_wc + os.sep + p, p)
414 if action == 'R':
415 removed_paths.append(old_p)
416 if len (commit_paths) < 100:
417 commit_paths.append(old_p)
418 continue
419 if action == 'A':
420 if os.path.isdir(original_wc + os.sep + p):
421 svn_add_dir(p)
422 else:
423 p_path = os.path.dirname(p).strip() or '.'
424 svn_add_dir(p_path)
425 shutil.copy(original_wc + os.sep + p, p)
426 run_svn(["add", p])
427 elif action == 'D':
428 removed_paths.append(p)
429 else: # action == 'M'
430 merged_paths.append(p)
431
432 if removed_paths:
433 for r in removed_paths:
434 run_svn(["up", r])
435 run_svn(["remove", "--force", r])
436
437 if merged_paths:
438 for m in merged_paths:
439 run_svn(["up", m])
440 m_url = svn_url + "/" + m
441 out = run_svn(["merge", "-c", str(svn_rev), "--non-recursive",
442 m_url+"@"+str(svn_rev), m])
443 # if conflicts, use the copy from original_wc
444 if out and out.split()[0] == 'C':
445 print "\n### Conflicts ignored: %s, in revision: %s\n" \
446 % (m, svn_rev)
447 run_svn(["revert", "--recursive", m])
448 if os.path.isfile(m):
449 shutil.copy(original_wc + os.sep + m, m)
450
451 if unrelated_paths:
452 print "Unrelated paths: "
453 print "*", unrelated_paths
454
455 ## too many files
456 if len (commit_paths) > 99:
457 commit_paths = []
458
459 try:
460 commit_from_svn_log_entry(log_entry, commit_paths,
461 keep_author=keep_author)
462 except ExternalCommandFailed:
463 # try to ignore the Properties conflicts on files and dirs
464 # use the copy from original_wc
465 has_Conflict = False
466 for d in log_entry['changed_paths']:
467 p = d['path']
468 p = p[len(svn_path):].strip("/")
469 if os.path.isfile(p):
470 if os.path.isfile(p + ".prej"):
471 has_Conflict = True
472 shutil.copy(original_wc + os.sep + p, p)
473 p2=os.sep + p.replace('_', '__').replace('/', '_') \
474 + ".prej-" + str(svn_rev)
475 shutil.move(p + ".prej", os.path.dirname(original_wc) + p2)
476 w="\n### Properties conflicts ignored:"
477 print "%s %s, in revision: %s\n" % (w, p, svn_rev)
478 elif os.path.isdir(p):
479 if os.path.isfile(p + os.sep + "dir_conflicts.prej"):
480 has_Conflict = True
481 p2=os.sep + p.replace('_', '__').replace('/', '_') \
482 + "_dir__conflicts.prej-" + str(svn_rev)
483 shutil.move(p + os.sep + "dir_conflicts.prej",
484 os.path.dirname(original_wc) + p2)
485 w="\n### Properties conflicts ignored:"
486 print "%s %s, in revision: %s\n" % (w, p, svn_rev)
487 out = run_svn(["propget", "svn:ignore",
488 original_wc + os.sep + p])
489 if out:
490 run_svn(["propset", "svn:ignore", out.strip(), p])
491 out = run_svn(["propget", "svn:externel",
492 original_wc + os.sep + p])
493 if out:
494 run_svn(["propset", "svn:external", out.strip(), p])
495 # try again
496 if has_Conflict:
497 commit_from_svn_log_entry(log_entry, commit_paths,
498 keep_author=keep_author)
499 else:
500 raise ExternalCommandFailed
501
502
503 def main():
504 usage = "Usage: %prog [-a] [-c] [-r SVN rev] <Source SVN URL> <Target SVN URL>"
505 parser = OptionParser(usage)
506 parser.add_option("-a", "--keep-author", action="store_true",
507 dest="keep_author", help="Keep revision Author or not")
508 parser.add_option("-c", "--continue-from-break", action="store_true",
509 dest="cont_from_break",
510 help="Continue from previous break")
511 parser.add_option("-r", "--svn-rev", type="int", dest="svn_rev",
512 help="SVN revision to checkout from")
513 (options, args) = parser.parse_args()
514 if len(args) != 2:
515 display_error("incorrect number of arguments\n\nTry: svn2svn.py --help",
516 False)
517
518 source_url = args.pop(0).rstrip("/")
519 target_url = args.pop(0).rstrip("/")
520 if options.keep_author:
521 keep_author = True
522 else:
523 keep_author = False
524
525 # Find the greatest_rev
526 # don't use 'svn info' to get greatest_rev, it doesn't work sometimes
527 svn_log = get_one_svn_log_entry(source_url, "HEAD", "HEAD")
528 greatest_rev = svn_log['revision']
529
530 original_wc = "_original_wc"
531 dup_wc = "_dup_wc"
532
533 ## old working copy does not exist, disable continue mode
534 if not os.path.exists(dup_wc):
535 options.cont_from_break = False
536
537 if not options.cont_from_break:
538 # Warn if Target SVN URL existed
539 cmd = find_program("svn")
540 pipe = Popen([cmd] + ["list"] + [target_url], executable=cmd,
541 stdout=PIPE, stderr=PIPE)
542 out, err = pipe.communicate()
543 if pipe.returncode == 0:
544 print "Target SVN URL: %s existed!" % target_url
545 if out:
546 print out
547 print "Press 'Enter' to Continue, 'Ctrl + C' to Cancel..."
548 print "(Timeout in 5 seconds)"
549 rfds, wfds, efds = select.select([sys.stdin], [], [], 5)
550
551 # Get log entry for the SVN revision we will check out
552 if options.svn_rev:
553 # If specify a rev, get log entry just before or at rev
554 svn_start_log = get_last_svn_log_entry(source_url, 1,
555 options.svn_rev)
556 else:
557 # Otherwise, get log entry of branch creation
558 svn_start_log = get_first_svn_log_entry(source_url, 1,
559 greatest_rev)
560
561 # This is the revision we will checkout from
562 svn_rev = svn_start_log['revision']
563
564 # Check out first revision (changeset) from Source SVN URL
565 if os.path.exists(original_wc):
566 shutil.rmtree(original_wc)
567 svn_checkout(source_url, original_wc, svn_rev)
568
569 # Import first revision (changeset) into Target SVN URL
570 timestamp = int(svn_start_log['date'])
571 svn_date = str(datetime.fromtimestamp(timestamp))
572 if keep_author:
573 run_svn(["import", original_wc, target_url, "-m",
574 svn_start_log['message'] + "\nDate: " + svn_date,
575 "--username", svn_start_log['author']])
576 else:
577 run_svn(["import", original_wc, target_url, "-m",
578 svn_start_log['message'] + "\nDate: " + svn_date +
579 "\nAuthor: " + svn_start_log['author']])
580
581 # Check out a working copy
582 if os.path.exists(dup_wc):
583 shutil.rmtree(dup_wc)
584 svn_checkout(target_url, dup_wc)
585
586 original_wc = os.path.abspath(original_wc)
587 dup_wc = os.path.abspath(dup_wc)
588 os.chdir(dup_wc)
589
590 # Get SVN info
591 svn_info = get_svn_info(original_wc)
592 # e.g. u'svn://svn.twistedmatrix.com/svn/Twisted'
593 repos_url = svn_info['repos_url']
594 # e.g. u'svn://svn.twistedmatrix.com/svn/Twisted/branches/xmpp'
595 svn_url = svn_info['url']
596 assert svn_url.startswith(repos_url)
597 # e.g. u'/branches/xmpp'
598 svn_path = svn_url[len(repos_url):]
599 # e.g. 'xmpp'
600 svn_branch = svn_url.split("/")[-1]
601
602 if options.cont_from_break:
603 svn_rev = svn_info['revision'] - 1
604 if svn_rev < 1:
605 svn_rev = 1
606
607 # Load SVN log starting from svn_rev + 1
608 it_log_entries = iter_svn_log_entries(svn_url, svn_rev + 1, greatest_rev)
609
610 try:
611 for log_entry in it_log_entries:
612 pull_svn_rev(log_entry, svn_url, target_url, svn_path,
613 original_wc, keep_author)
614
615 except KeyboardInterrupt:
616 print "\nStopped by user."
617 run_svn(["cleanup"])
618 run_svn(["revert", "--recursive", "."])
619 except:
620 print "\nCommand failed with following error:\n"
621 traceback.print_exc()
622 run_svn(["cleanup"])
623 run_svn(["revert", "--recursive", "."])
624 finally:
625 run_svn(["up"])
626 print "\nFinished!"
627
628
629 if __name__ == "__main__":
630 main()
631