]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn.py
svn log -r HEAD:HEAD doesn't always work, but svn log should return HEAD info by...
[svn2svn.git] / svn2svn.py
1 #!/usr/bin/env python
2 """
3 svn2svn.py
4
5 Replicate changesets from one SVN repository to another,
6 includes diffs, comments, and Dates of each revision.
7 It's also possible to retain the Author info if the Target SVN URL
8 is in a local filesystem (ie, running svn2svn.py on Target SVN server),
9 or if Target SVN URL is managed through ssh tunnel.
10 In later case, please run 'ssh-add' (adds RSA or DSA identities to
11 the authentication agent) before invoking svn2svn.py.
12
13 For example (in Unix environment):
14 $ exec /usr/bin/ssh-agent $SHELL
15 $ /usr/bin/ssh-add
16 Enter passphrase for /home/user/.ssh/id_dsa:
17 Identity added: /home/user/.ssh/id_dsa (/home/user/.ssh/id_dsa)
18 $ python ./svn2svn.py -a SOURCE TARGET
19
20 Written and used on Ubuntu 7.04 (Feisty Fawn).
21 Provided as-is and absolutely no warranty - aka Don't bet your life on it.
22
23 This tool re-used some modules from svnclient.py on project hgsvn
24 (a tool can create Mercurial repository from SVN repository):
25 http://cheeseshop.python.org/pypi/hgsvn
26
27 License: GPLv2, the same as hgsvn.
28
29 version 0.1.1; Jul 31, 2007; simford dot dong at gmail dot com
30 """
31
32 import os
33 import sys
34 import time
35 import locale
36 import shutil
37 import select
38 import calendar
39 import traceback
40
41 from optparse import OptionParser
42 from subprocess import Popen, PIPE
43 from datetime import datetime
44
45 try:
46 from xml.etree import cElementTree as ET
47 except ImportError:
48 try:
49 from xml.etree import ElementTree as ET
50 except ImportError:
51 try:
52 import cElementTree as ET
53 except ImportError:
54 from elementtree import ElementTree as ET
55
56 svn_log_args = ['log', '--xml', '-v']
57 svn_info_args = ['info', '--xml']
58 svn_checkout_args = ['checkout', '-q']
59 svn_status_args = ['status', '--xml', '-v', '--ignore-externals']
60
61 # define exception class
62 class ExternalCommandFailed(RuntimeError):
63 """
64 An external command failed.
65 """
66
67 class ParameterError(RuntimeError):
68 """
69 An external command failed.
70 """
71
72 def display_error(message, raise_exception = True):
73 """
74 Display error message, then terminate.
75 """
76 print "Error:", message
77 print
78 if raise_exception:
79 raise ExternalCommandFailed
80 else:
81 sys.exit(1)
82
83 # Windows compatibility code by Bill Baxter
84 if os.name == "nt":
85 def find_program(name):
86 """
87 Find the name of the program for Popen.
88 Windows is finnicky about having the complete file name. Popen
89 won't search the %PATH% for you automatically.
90 (Adapted from ctypes.find_library)
91 """
92 # See MSDN for the REAL search order.
93 base, ext = os.path.splitext(name)
94 if ext:
95 exts = [ext]
96 else:
97 exts = ['.bat', '.exe']
98 for directory in os.environ['PATH'].split(os.pathsep):
99 for e in exts:
100 fname = os.path.join(directory, base + e)
101 if os.path.exists(fname):
102 return fname
103 return None
104 else:
105 def find_program(name):
106 """
107 Find the name of the program for Popen.
108 On Unix, popen isn't picky about having absolute paths.
109 """
110 return name
111
112 def shell_quote(s):
113 if os.name == "nt":
114 q = '"'
115 else:
116 q = "'"
117 return q + s.replace('\\', '\\\\').replace("'", "'\"'\"'") + q
118
119 locale_encoding = locale.getpreferredencoding()
120
121 def run_svn(args, fail_if_stderr=False, encoding="utf-8"):
122 """
123 Run svn cmd in PIPE
124 exit if svn cmd failed
125 """
126 def _transform_arg(a):
127 if isinstance(a, unicode):
128 a = a.encode(encoding or locale_encoding)
129 elif not isinstance(a, str):
130 a = str(a)
131 return a
132 t_args = map(_transform_arg, args)
133
134 cmd = find_program("svn")
135 cmd_string = str(" ".join(map(shell_quote, [cmd] + t_args)))
136 print "*", cmd_string
137 pipe = Popen([cmd] + t_args, executable=cmd, stdout=PIPE, stderr=PIPE)
138 out, err = pipe.communicate()
139 if pipe.returncode != 0 or (fail_if_stderr and err.strip()):
140 display_error("External program failed (return code %d): %s\n%s"
141 % (pipe.returncode, cmd_string, err))
142 return out
143
144 def svn_date_to_timestamp(svn_date):
145 """
146 Parse an SVN date as read from the XML output and
147 return the corresponding timestamp.
148 """
149 # Strip microseconds and timezone (always UTC, hopefully)
150 # XXX there are various ISO datetime parsing routines out there,
151 # cf. http://seehuhn.de/comp/pdate
152 date = svn_date.split('.', 2)[0]
153 time_tuple = time.strptime(date, "%Y-%m-%dT%H:%M:%S")
154 return calendar.timegm(time_tuple)
155
156 def parse_svn_info_xml(xml_string):
157 """
158 Parse the XML output from an "svn info" command and extract
159 useful information as a dict.
160 """
161 d = {}
162 tree = ET.fromstring(xml_string)
163 entry = tree.find('.//entry')
164 if entry:
165 d['url'] = entry.find('url').text
166 d['revision'] = int(entry.get('revision'))
167 d['repos_url'] = tree.find('.//repository/root').text
168 d['last_changed_rev'] = int(tree.find('.//commit').get('revision'))
169 d['kind'] = entry.get('kind')
170 return d
171
172 def parse_svn_log_xml(xml_string):
173 """
174 Parse the XML output from an "svn log" command and extract
175 useful information as a list of dicts (one per log changeset).
176 """
177 l = []
178 tree = ET.fromstring(xml_string)
179 for entry in tree.findall('logentry'):
180 d = {}
181 d['revision'] = int(entry.get('revision'))
182 # Some revisions don't have authors, most notably
183 # the first revision in a repository.
184 author = entry.find('author')
185 d['author'] = author is not None and author.text or None
186 d['date'] = svn_date_to_timestamp(entry.find('date').text)
187 # Some revisions may have empty commit message
188 message = entry.find('msg')
189 message = message is not None and message.text is not None \
190 and message.text.strip() or ""
191 # Replace DOS return '\r\n' and MacOS return '\r' with unix return '\n'
192 d['message'] = message.replace('\r\n', '\n').replace('\n\r', '\n'). \
193 replace('\r', '\n')
194 paths = d['changed_paths'] = []
195 for path in entry.findall('.//path'):
196 copyfrom_rev = path.get('copyfrom-rev')
197 if copyfrom_rev:
198 copyfrom_rev = int(copyfrom_rev)
199 paths.append({
200 'path': path.text,
201 'action': path.get('action'),
202 'copyfrom_path': path.get('copyfrom-path'),
203 'copyfrom_revision': copyfrom_rev,
204 })
205 l.append(d)
206 return l
207
208 def parse_svn_status_xml(xml_string, base_dir=None):
209 """
210 Parse the XML output from an "svn status" command and extract
211 useful info as a list of dicts (one per status entry).
212 """
213 l = []
214 tree = ET.fromstring(xml_string)
215 for entry in tree.findall('.//entry'):
216 d = {}
217 path = entry.get('path')
218 if base_dir is not None:
219 assert path.startswith(base_dir)
220 path = path[len(base_dir):].lstrip('/\\')
221 d['path'] = path
222 wc_status = entry.find('wc-status')
223 if wc_status.get('item') == 'external':
224 d['type'] = 'external'
225 elif wc_status.get('revision') is not None:
226 d['type'] = 'normal'
227 else:
228 d['type'] = 'unversioned'
229 l.append(d)
230 return l
231
232 def get_svn_info(svn_url_or_wc, rev_number=None):
233 """
234 Get SVN information for the given URL or working copy,
235 with an optionally specified revision number.
236 Returns a dict as created by parse_svn_info_xml().
237 """
238 if rev_number is not None:
239 args = [svn_url_or_wc + "@" + str(rev_number)]
240 else:
241 args = [svn_url_or_wc]
242 xml_string = run_svn(svn_info_args + args,
243 fail_if_stderr=True)
244 return parse_svn_info_xml(xml_string)
245
246 def svn_checkout(svn_url, checkout_dir, rev_number=None):
247 """
248 Checkout the given URL at an optional revision number.
249 """
250 args = []
251 if rev_number is not None:
252 args += ['-r', rev_number]
253 args += [svn_url, checkout_dir]
254 return run_svn(svn_checkout_args + args)
255
256 def run_svn_log(svn_url_or_wc, rev_start, rev_end, limit, stop_on_copy=False):
257 """
258 Fetch up to 'limit' SVN log entries between the given revisions.
259 """
260 if stop_on_copy:
261 args = ['--stop-on-copy']
262 else:
263 args = []
264 if rev_start != 'HEAD' and rev_end != 'HEAD':
265 args += ['-r', '%s:%s' % (rev_start, rev_end)]
266 args += ['--limit', str(limit), svn_url_or_wc]
267 xml_string = run_svn(svn_log_args + args)
268 return parse_svn_log_xml(xml_string)
269
270 def get_svn_status(svn_wc):
271 """
272 Get SVN status information about the given working copy.
273 """
274 # Ensure proper stripping by canonicalizing the path
275 svn_wc = os.path.abspath(svn_wc)
276 args = [svn_wc]
277 xml_string = run_svn(svn_status_args + args)
278 return parse_svn_status_xml(xml_string, svn_wc)
279
280 def get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=False):
281 """
282 Get the first SVN log entry in the requested revision range.
283 """
284 entries = run_svn_log(svn_url, rev_start, rev_end, 1, stop_on_copy)
285 if not entries:
286 display_error("No SVN log for %s between revisions %s and %s" %
287 (svn_url, rev_start, rev_end))
288
289 return entries[0]
290
291 def get_first_svn_log_entry(svn_url, rev_start, rev_end):
292 """
293 Get the first log entry after/at the given revision number in an SVN branch.
294 By default the revision number is set to 0, which will give you the log
295 entry corresponding to the branch creaction.
296
297 NOTE: to know whether the branch creation corresponds to an SVN import or
298 a copy from another branch, inspect elements of the 'changed_paths' entry
299 in the returned dictionary.
300 """
301 return get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True)
302
303 def get_last_svn_log_entry(svn_url, rev_start, rev_end):
304 """
305 Get the last log entry before/at the given revision number in an SVN branch.
306 By default the revision number is set to HEAD, which will give you the log
307 entry corresponding to the latest commit in branch.
308 """
309 return get_one_svn_log_entry(svn_url, rev_end, rev_start, stop_on_copy=True)
310
311
312 log_duration_threshold = 10.0
313 log_min_chunk_length = 10
314
315 def iter_svn_log_entries(svn_url, first_rev, last_rev):
316 """
317 Iterate over SVN log entries between first_rev and last_rev.
318
319 This function features chunked log fetching so that it isn't too nasty
320 to the SVN server if many entries are requested.
321 """
322 cur_rev = first_rev
323 chunk_length = log_min_chunk_length
324 chunk_interval_factor = 1.0
325 while last_rev == "HEAD" or cur_rev <= last_rev:
326 start_t = time.time()
327 stop_rev = min(last_rev, cur_rev + int(chunk_length * chunk_interval_factor))
328 entries = run_svn_log(svn_url, cur_rev, stop_rev, chunk_length)
329 duration = time.time() - start_t
330 if not entries:
331 if stop_rev == last_rev:
332 break
333 cur_rev = stop_rev + 1
334 chunk_interval_factor *= 2.0
335 continue
336 for e in entries:
337 yield e
338 cur_rev = e['revision'] + 1
339 # Adapt chunk length based on measured request duration
340 if duration < log_duration_threshold:
341 chunk_length = int(chunk_length * 2.0)
342 elif duration > log_duration_threshold * 2:
343 chunk_length = max(log_min_chunk_length, int(chunk_length / 2.0))
344
345 def commit_from_svn_log_entry(entry, files=None, keep_author=False):
346 """
347 Given an SVN log entry and an optional sequence of files, do an svn commit.
348 """
349 # This will use the local timezone for displaying commit times
350 timestamp = int(entry['date'])
351 svn_date = str(datetime.fromtimestamp(timestamp))
352 # Uncomment this one one if you prefer UTC commit times
353 #svn_date = "%d 0" % timestamp
354 if keep_author:
355 options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date, "--username", entry['author']]
356 else:
357 options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date + "\nAuthor: " + entry['author']]
358 if files:
359 options += list(files)
360 run_svn(options)
361
362 def svn_add_dir(p):
363 # set p = "." when p = ""
364 #p = p.strip() or "."
365 if p.strip() and not os.path.exists(p + os.sep + ".svn"):
366 svn_add_dir(os.path.dirname(p))
367 if not os.path.exists(p):
368 os.makedirs(p)
369 run_svn(["add", p])
370
371 def pull_svn_rev(log_entry, svn_url, target_url, svn_path, original_wc, keep_author=False):
372 """
373 Pull SVN changes from the given log entry.
374 Returns the new SVN revision.
375 If an exception occurs, it will rollback to revision 'svn_rev - 1'.
376 """
377 svn_rev = log_entry['revision']
378 run_svn(["up", "--ignore-externals", "-r", svn_rev, original_wc])
379
380 removed_paths = []
381 merged_paths = []
382 unrelated_paths = []
383 commit_paths = []
384 for d in log_entry['changed_paths']:
385 # e.g. u'/branches/xmpp/twisted/words/test/test.py'
386 p = d['path']
387 if not p.startswith(svn_path + "/"):
388 # Ignore changed files that are not part of this subdir
389 if p != svn_path:
390 unrelated_paths.append(p)
391 continue
392 # e.g. u'twisted/words/test/test.py'
393 p = p[len(svn_path):].strip("/")
394 # Record for commit
395 action = d['action']
396 if action not in 'MARD':
397 display_error("In SVN rev. %d: action '%s' not supported. \
398 Please report a bug!" % (svn_rev, action))
399
400 if len (commit_paths) < 100:
401 commit_paths.append(p)
402 # Detect special cases
403 old_p = d['copyfrom_path']
404 if old_p and old_p.startswith(svn_path + "/"):
405 old_p = old_p[len(svn_path):].strip("/")
406 # Both paths can be identical if copied from an old rev.
407 # We treat like it a normal change.
408 if old_p != p:
409 if not os.path.exists(p + os.sep + '.svn'):
410 svn_add_dir(os.path.dirname(p))
411 run_svn(["up", old_p])
412 run_svn(["copy", old_p, p])
413 if os.path.isfile(p):
414 shutil.copy(original_wc + os.sep + p, p)
415 if action == 'R':
416 removed_paths.append(old_p)
417 if len (commit_paths) < 100:
418 commit_paths.append(old_p)
419 continue
420 if action == 'A':
421 if os.path.isdir(original_wc + os.sep + p):
422 svn_add_dir(p)
423 else:
424 p_path = os.path.dirname(p).strip() or '.'
425 svn_add_dir(p_path)
426 shutil.copy(original_wc + os.sep + p, p)
427 run_svn(["add", p])
428 elif action == 'D':
429 removed_paths.append(p)
430 else: # action == 'M'
431 merged_paths.append(p)
432
433 if removed_paths:
434 for r in removed_paths:
435 run_svn(["up", r])
436 run_svn(["remove", "--force", r])
437
438 if merged_paths:
439 for m in merged_paths:
440 run_svn(["up", m])
441 m_url = svn_url + "/" + m
442 out = run_svn(["merge", "-c", str(svn_rev), "--non-recursive",
443 "--non-interactive", "--accept=theirs-full",
444 m_url+"@"+str(svn_rev), m])
445 # if conflicts, use the copy from original_wc
446 if out and out.split()[0] == 'C':
447 print "\n### Conflicts ignored: %s, in revision: %s\n" \
448 % (m, svn_rev)
449 run_svn(["revert", "--recursive", m])
450 if os.path.isfile(m):
451 shutil.copy(original_wc + os.sep + m, m)
452
453 if unrelated_paths:
454 print "Unrelated paths: "
455 print "*", unrelated_paths
456
457 ## too many files
458 if len (commit_paths) > 99:
459 commit_paths = []
460
461 try:
462 commit_from_svn_log_entry(log_entry, commit_paths,
463 keep_author=keep_author)
464 except ExternalCommandFailed:
465 # try to ignore the Properties conflicts on files and dirs
466 # use the copy from original_wc
467 has_Conflict = False
468 for d in log_entry['changed_paths']:
469 p = d['path']
470 p = p[len(svn_path):].strip("/")
471 if os.path.isfile(p):
472 if os.path.isfile(p + ".prej"):
473 has_Conflict = True
474 shutil.copy(original_wc + os.sep + p, p)
475 p2=os.sep + p.replace('_', '__').replace('/', '_') \
476 + ".prej-" + str(svn_rev)
477 shutil.move(p + ".prej", os.path.dirname(original_wc) + p2)
478 w="\n### Properties conflicts ignored:"
479 print "%s %s, in revision: %s\n" % (w, p, svn_rev)
480 elif os.path.isdir(p):
481 if os.path.isfile(p + os.sep + "dir_conflicts.prej"):
482 has_Conflict = True
483 p2=os.sep + p.replace('_', '__').replace('/', '_') \
484 + "_dir__conflicts.prej-" + str(svn_rev)
485 shutil.move(p + os.sep + "dir_conflicts.prej",
486 os.path.dirname(original_wc) + p2)
487 w="\n### Properties conflicts ignored:"
488 print "%s %s, in revision: %s\n" % (w, p, svn_rev)
489 out = run_svn(["propget", "svn:ignore",
490 original_wc + os.sep + p])
491 if out:
492 run_svn(["propset", "svn:ignore", out.strip(), p])
493 out = run_svn(["propget", "svn:externel",
494 original_wc + os.sep + p])
495 if out:
496 run_svn(["propset", "svn:external", out.strip(), p])
497 # try again
498 if has_Conflict:
499 commit_from_svn_log_entry(log_entry, commit_paths,
500 keep_author=keep_author)
501 else:
502 raise ExternalCommandFailed
503
504
505 def main():
506 usage = "Usage: %prog [-a] [-c] [-r SVN rev] <Source SVN URL> <Target SVN URL>"
507 parser = OptionParser(usage)
508 parser.add_option("-a", "--keep-author", action="store_true",
509 dest="keep_author", help="Keep revision Author or not")
510 parser.add_option("-c", "--continue-from-break", action="store_true",
511 dest="cont_from_break",
512 help="Continue from previous break")
513 parser.add_option("-r", "--svn-rev", type="int", dest="svn_rev",
514 help="SVN revision to checkout from")
515 (options, args) = parser.parse_args()
516 if len(args) != 2:
517 display_error("incorrect number of arguments\n\nTry: svn2svn.py --help",
518 False)
519
520 source_url = args.pop(0).rstrip("/")
521 target_url = args.pop(0).rstrip("/")
522 if options.keep_author:
523 keep_author = True
524 else:
525 keep_author = False
526
527 # Find the greatest_rev
528 # don't use 'svn info' to get greatest_rev, it doesn't work sometimes
529 svn_log = get_one_svn_log_entry(source_url, "HEAD", "HEAD")
530 greatest_rev = svn_log['revision']
531
532 original_wc = "_original_wc"
533 dup_wc = "_dup_wc"
534
535 ## old working copy does not exist, disable continue mode
536 if not os.path.exists(dup_wc):
537 options.cont_from_break = False
538
539 if not options.cont_from_break:
540 # Warn if Target SVN URL existed
541 cmd = find_program("svn")
542 pipe = Popen([cmd] + ["list"] + [target_url], executable=cmd,
543 stdout=PIPE, stderr=PIPE)
544 out, err = pipe.communicate()
545 if pipe.returncode == 0:
546 print "Target SVN URL: %s existed!" % target_url
547 if out:
548 print out
549 print "Press 'Enter' to Continue, 'Ctrl + C' to Cancel..."
550 print "(Timeout in 5 seconds)"
551 rfds, wfds, efds = select.select([sys.stdin], [], [], 5)
552
553 # Get log entry for the SVN revision we will check out
554 if options.svn_rev:
555 # If specify a rev, get log entry just before or at rev
556 svn_start_log = get_last_svn_log_entry(source_url, 1,
557 options.svn_rev)
558 else:
559 # Otherwise, get log entry of branch creation
560 svn_start_log = get_first_svn_log_entry(source_url, 1,
561 greatest_rev)
562
563 # This is the revision we will checkout from
564 svn_rev = svn_start_log['revision']
565
566 # Check out first revision (changeset) from Source SVN URL
567 if os.path.exists(original_wc):
568 shutil.rmtree(original_wc)
569 svn_checkout(source_url, original_wc, svn_rev)
570
571 # Import first revision (changeset) into Target SVN URL
572 timestamp = int(svn_start_log['date'])
573 svn_date = str(datetime.fromtimestamp(timestamp))
574 if keep_author:
575 run_svn(["import", original_wc, target_url, "-m",
576 svn_start_log['message'] + "\nDate: " + svn_date,
577 "--username", svn_start_log['author']])
578 else:
579 run_svn(["import", original_wc, target_url, "-m",
580 svn_start_log['message'] + "\nDate: " + svn_date +
581 "\nAuthor: " + svn_start_log['author']])
582
583 # Check out a working copy
584 if os.path.exists(dup_wc):
585 shutil.rmtree(dup_wc)
586 svn_checkout(target_url, dup_wc)
587
588 original_wc = os.path.abspath(original_wc)
589 dup_wc = os.path.abspath(dup_wc)
590 os.chdir(dup_wc)
591
592 # Get SVN info
593 svn_info = get_svn_info(original_wc)
594 # e.g. u'svn://svn.twistedmatrix.com/svn/Twisted'
595 repos_url = svn_info['repos_url']
596 # e.g. u'svn://svn.twistedmatrix.com/svn/Twisted/branches/xmpp'
597 svn_url = svn_info['url']
598 assert svn_url.startswith(repos_url)
599 # e.g. u'/branches/xmpp'
600 svn_path = svn_url[len(repos_url):]
601 # e.g. 'xmpp'
602 svn_branch = svn_url.split("/")[-1]
603
604 if options.cont_from_break:
605 svn_rev = svn_info['revision'] - 1
606 if svn_rev < 1:
607 svn_rev = 1
608
609 # Load SVN log starting from svn_rev + 1
610 it_log_entries = iter_svn_log_entries(svn_url, svn_rev + 1, greatest_rev)
611
612 try:
613 for log_entry in it_log_entries:
614 pull_svn_rev(log_entry, svn_url, target_url, svn_path,
615 original_wc, keep_author)
616
617 except KeyboardInterrupt:
618 print "\nStopped by user."
619 run_svn(["cleanup"])
620 run_svn(["revert", "--recursive", "."])
621 except:
622 print "\nCommand failed with following error:\n"
623 traceback.print_exc()
624 run_svn(["cleanup"])
625 run_svn(["revert", "--recursive", "."])
626 finally:
627 run_svn(["up"])
628 print "\nFinished!"
629
630
631 if __name__ == "__main__":
632 main()
633