]>
Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn.py
5 Replicate changesets from one SVN repository to another,
6 includes diffs, comments, and Dates of each revision.
7 It's also possible to retain the Author info if the Target SVN URL
8 is in a local filesystem (ie, running svn2svn.py on Target SVN server),
9 or if Target SVN URL is managed through ssh tunnel.
10 In later case, please run 'ssh-add' (adds RSA or DSA identities to
11 the authentication agent) before invoking svn2svn.py.
13 For example (in Unix environment):
14 $ exec /usr/bin/ssh-agent $SHELL
16 Enter passphrase for /home/user/.ssh/id_dsa:
17 Identity added: /home/user/.ssh/id_dsa (/home/user/.ssh/id_dsa)
18 $ python ./svn2svn.py -a SOURCE TARGET
20 Written and used on Ubuntu 7.04 (Feisty Fawn).
21 Provided as-is and absolutely no warranty - aka Don't bet your life on it.
23 This tool re-used some modules from svnclient.py on project hgsvn
24 (a tool can create Mercurial repository from SVN repository):
25 http://cheeseshop.python.org/pypi/hgsvn
27 License: GPLv2, the same as hgsvn.
29 version 0.1.1; Jul 31, 2007; simford dot dong at gmail dot com
41 from optparse
import OptionParser
42 from subprocess
import Popen
, PIPE
43 from datetime
import datetime
46 from xml
.etree
import cElementTree
as ET
49 from xml
.etree
import ElementTree
as ET
52 import cElementTree
as ET
54 from elementtree
import ElementTree
as ET
56 svn_log_args
= ['log', '--xml', '-v']
57 svn_info_args
= ['info', '--xml']
58 svn_checkout_args
= ['checkout', '-q']
59 svn_status_args
= ['status', '--xml', '-v', '--ignore-externals']
61 # define exception class
62 class ExternalCommandFailed(RuntimeError):
64 An external command failed.
67 class ParameterError(RuntimeError):
69 An external command failed.
72 def display_error(message
, raise_exception
= True):
74 Display error message, then terminate.
76 print "Error:", message
79 raise ExternalCommandFailed
83 # Windows compatibility code by Bill Baxter
85 def find_program(name
):
87 Find the name of the program for Popen.
88 Windows is finnicky about having the complete file name. Popen
89 won't search the %PATH% for you automatically.
90 (Adapted from ctypes.find_library)
92 # See MSDN for the REAL search order.
93 base
, ext
= os
.path
.splitext(name
)
97 exts
= ['.bat', '.exe']
98 for directory
in os
.environ
['PATH'].split(os
.pathsep
):
100 fname
= os
.path
.join(directory
, base
+ e
)
101 if os
.path
.exists(fname
):
105 def find_program(name
):
107 Find the name of the program for Popen.
108 On Unix, popen isn't picky about having absolute paths.
117 return q
+ s
.replace('\\', '\\\\').replace("'", "'\"'\"'") + q
119 locale_encoding
= locale
.getpreferredencoding()
121 def run_svn(args
, fail_if_stderr
=False, encoding
="utf-8"):
124 exit if svn cmd failed
126 def _transform_arg(a
):
127 if isinstance(a
, unicode):
128 a
= a
.encode(encoding
or locale_encoding
)
129 elif not isinstance(a
, str):
132 t_args
= map(_transform_arg
, args
)
134 cmd
= find_program("svn")
135 cmd_string
= str(" ".join(map(shell_quote
, [cmd
] + t_args
)))
136 print "*", cmd_string
137 pipe
= Popen([cmd
] + t_args
, executable
=cmd
, stdout
=PIPE
, stderr
=PIPE
)
138 out
, err
= pipe
.communicate()
139 if pipe
.returncode
!= 0 or (fail_if_stderr
and err
.strip()):
140 display_error("External program failed (return code %d): %s\n%s"
141 % (pipe
.returncode
, cmd_string
, err
))
144 def svn_date_to_timestamp(svn_date
):
146 Parse an SVN date as read from the XML output and
147 return the corresponding timestamp.
149 # Strip microseconds and timezone (always UTC, hopefully)
150 # XXX there are various ISO datetime parsing routines out there,
151 # cf. http://seehuhn.de/comp/pdate
152 date
= svn_date
.split('.', 2)[0]
153 time_tuple
= time
.strptime(date
, "%Y-%m-%dT%H:%M:%S")
154 return calendar
.timegm(time_tuple
)
156 def parse_svn_info_xml(xml_string
):
158 Parse the XML output from an "svn info" command and extract
159 useful information as a dict.
162 tree
= ET
.fromstring(xml_string
)
163 entry
= tree
.find('.//entry')
165 d
['url'] = entry
.find('url').text
166 d
['revision'] = int(entry
.get('revision'))
167 d
['repos_url'] = tree
.find('.//repository/root').text
168 d
['last_changed_rev'] = int(tree
.find('.//commit').get('revision'))
169 d
['kind'] = entry
.get('kind')
172 def parse_svn_log_xml(xml_string
):
174 Parse the XML output from an "svn log" command and extract
175 useful information as a list of dicts (one per log changeset).
178 tree
= ET
.fromstring(xml_string
)
179 for entry
in tree
.findall('logentry'):
181 d
['revision'] = int(entry
.get('revision'))
182 # Some revisions don't have authors, most notably
183 # the first revision in a repository.
184 author
= entry
.find('author')
185 d
['author'] = author
is not None and author
.text
or None
186 d
['date'] = svn_date_to_timestamp(entry
.find('date').text
)
187 # Some revisions may have empty commit message
188 message
= entry
.find('msg')
189 message
= message
is not None and message
.text
is not None \
190 and message
.text
.strip() or ""
191 # Replace DOS return '\r\n' and MacOS return '\r' with unix return '\n'
192 d
['message'] = message
.replace('\r\n', '\n').replace('\n\r', '\n'). \
194 paths
= d
['changed_paths'] = []
195 for path
in entry
.findall('.//path'):
196 copyfrom_rev
= path
.get('copyfrom-rev')
198 copyfrom_rev
= int(copyfrom_rev
)
201 'action': path
.get('action'),
202 'copyfrom_path': path
.get('copyfrom-path'),
203 'copyfrom_revision': copyfrom_rev
,
208 def parse_svn_status_xml(xml_string
, base_dir
=None):
210 Parse the XML output from an "svn status" command and extract
211 useful info as a list of dicts (one per status entry).
214 tree
= ET
.fromstring(xml_string
)
215 for entry
in tree
.findall('.//entry'):
217 path
= entry
.get('path')
218 if base_dir
is not None:
219 assert path
.startswith(base_dir
)
220 path
= path
[len(base_dir
):].lstrip('/\\')
222 wc_status
= entry
.find('wc-status')
223 if wc_status
.get('item') == 'external':
224 d
['type'] = 'external'
225 elif wc_status
.get('revision') is not None:
228 d
['type'] = 'unversioned'
232 def get_svn_info(svn_url_or_wc
, rev_number
=None):
234 Get SVN information for the given URL or working copy,
235 with an optionally specified revision number.
236 Returns a dict as created by parse_svn_info_xml().
238 if rev_number
is not None:
239 args
= [svn_url_or_wc
+ "@" + str(rev_number
)]
241 args
= [svn_url_or_wc
]
242 xml_string
= run_svn(svn_info_args
+ args
,
244 return parse_svn_info_xml(xml_string
)
246 def svn_checkout(svn_url
, checkout_dir
, rev_number
=None):
248 Checkout the given URL at an optional revision number.
251 if rev_number
is not None:
252 args
+= ['-r', rev_number
]
253 args
+= [svn_url
, checkout_dir
]
254 return run_svn(svn_checkout_args
+ args
)
256 def run_svn_log(svn_url_or_wc
, rev_start
, rev_end
, limit
, stop_on_copy
=False):
258 Fetch up to 'limit' SVN log entries between the given revisions.
261 args
= ['--stop-on-copy']
264 if rev_start
!= 'HEAD' and rev_end
!= 'HEAD':
265 args
+= ['-r', '%s:%s' % (rev_start
, rev_end
)]
266 args
+= ['--limit', str(limit
), svn_url_or_wc
]
267 xml_string
= run_svn(svn_log_args
+ args
)
268 return parse_svn_log_xml(xml_string
)
270 def get_svn_status(svn_wc
, flags
=None):
272 Get SVN status information about the given working copy.
274 # Ensure proper stripping by canonicalizing the path
275 svn_wc
= os
.path
.abspath(svn_wc
)
280 xml_string
= run_svn(svn_status_args
+ args
)
281 return parse_svn_status_xml(xml_string
, svn_wc
)
283 def get_one_svn_log_entry(svn_url
, rev_start
, rev_end
, stop_on_copy
=False):
285 Get the first SVN log entry in the requested revision range.
287 entries
= run_svn_log(svn_url
, rev_start
, rev_end
, 1, stop_on_copy
)
289 display_error("No SVN log for %s between revisions %s and %s" %
290 (svn_url
, rev_start
, rev_end
))
294 def get_first_svn_log_entry(svn_url
, rev_start
, rev_end
):
296 Get the first log entry after/at the given revision number in an SVN branch.
297 By default the revision number is set to 0, which will give you the log
298 entry corresponding to the branch creaction.
300 NOTE: to know whether the branch creation corresponds to an SVN import or
301 a copy from another branch, inspect elements of the 'changed_paths' entry
302 in the returned dictionary.
304 return get_one_svn_log_entry(svn_url
, rev_start
, rev_end
, stop_on_copy
=True)
306 def get_last_svn_log_entry(svn_url
, rev_start
, rev_end
):
308 Get the last log entry before/at the given revision number in an SVN branch.
309 By default the revision number is set to HEAD, which will give you the log
310 entry corresponding to the latest commit in branch.
312 return get_one_svn_log_entry(svn_url
, rev_end
, rev_start
, stop_on_copy
=True)
315 log_duration_threshold
= 10.0
316 log_min_chunk_length
= 10
318 def iter_svn_log_entries(svn_url
, first_rev
, last_rev
):
320 Iterate over SVN log entries between first_rev and last_rev.
322 This function features chunked log fetching so that it isn't too nasty
323 to the SVN server if many entries are requested.
326 chunk_length
= log_min_chunk_length
327 chunk_interval_factor
= 1.0
328 while last_rev
== "HEAD" or cur_rev
<= last_rev
:
329 start_t
= time
.time()
330 stop_rev
= min(last_rev
, cur_rev
+ int(chunk_length
* chunk_interval_factor
))
331 entries
= run_svn_log(svn_url
, cur_rev
, stop_rev
, chunk_length
)
332 duration
= time
.time() - start_t
334 if stop_rev
== last_rev
:
336 cur_rev
= stop_rev
+ 1
337 chunk_interval_factor
*= 2.0
341 cur_rev
= e
['revision'] + 1
342 # Adapt chunk length based on measured request duration
343 if duration
< log_duration_threshold
:
344 chunk_length
= int(chunk_length
* 2.0)
345 elif duration
> log_duration_threshold
* 2:
346 chunk_length
= max(log_min_chunk_length
, int(chunk_length
/ 2.0))
348 def commit_from_svn_log_entry(entry
, files
=None, keep_author
=False):
350 Given an SVN log entry and an optional sequence of files, do an svn commit.
352 # This will use the local timezone for displaying commit times
353 timestamp
= int(entry
['date'])
354 svn_date
= str(datetime
.fromtimestamp(timestamp
))
355 # Uncomment this one one if you prefer UTC commit times
356 #svn_date = "%d 0" % timestamp
358 options
= ["ci", "--force-log", "-m", entry
['message'] + "\nDate: " + svn_date
, "--username", entry
['author']]
360 options
= ["ci", "--force-log", "-m", entry
['message'] + "\nDate: " + svn_date
+ "\nAuthor: " + entry
['author']]
362 options
+= list(files
)
366 entries
= get_svn_status(p
)
370 return (d
['type'] == 'normal')
373 # set p = "." when p = ""
374 #p = p.strip() or "."
375 if p
.strip() and not in_svn(p
):
376 svn_add_dir(os
.path
.dirname(p
))
377 if not os
.path
.exists(p
):
381 def pull_svn_rev(log_entry
, svn_url
, target_url
, svn_path
, original_wc
, keep_author
=False):
383 Pull SVN changes from the given log entry.
384 Returns the new SVN revision.
385 If an exception occurs, it will rollback to revision 'svn_rev - 1'.
387 svn_rev
= log_entry
['revision']
388 run_svn(["up", "--ignore-externals", "-r", svn_rev
, original_wc
])
394 for d
in log_entry
['changed_paths']:
395 # e.g. u'/branches/xmpp/twisted/words/test/test.py'
397 if not p
.startswith(svn_path
+ "/"):
398 # Ignore changed files that are not part of this subdir
400 unrelated_paths
.append(p
)
402 # e.g. u'twisted/words/test/test.py'
403 p
= p
[len(svn_path
):].strip("/")
406 if action
not in 'MARD':
407 display_error("In SVN rev. %d: action '%s' not supported. \
408 Please report a bug!" % (svn_rev
, action
))
410 if len (commit_paths
) < 100:
411 commit_paths
.append(p
)
412 # Detect special cases
413 old_p
= d
['copyfrom_path']
414 if old_p
and old_p
.startswith(svn_path
+ "/"):
415 old_p
= old_p
[len(svn_path
):].strip("/")
416 # Both paths can be identical if copied from an old rev.
417 # We treat like it a normal change.
420 svn_add_dir(os
.path
.dirname(p
))
421 run_svn(["up", old_p
])
422 run_svn(["copy", old_p
, p
])
423 if os
.path
.isfile(p
):
424 shutil
.copy(original_wc
+ os
.sep
+ p
, p
)
426 removed_paths
.append(old_p
)
427 if len (commit_paths
) < 100:
428 commit_paths
.append(old_p
)
431 if os
.path
.isdir(original_wc
+ os
.sep
+ p
):
434 p_path
= os
.path
.dirname(p
).strip() or '.'
436 shutil
.copy(original_wc
+ os
.sep
+ p
, p
)
439 removed_paths
.append(p
)
440 else: # action == 'M'
441 merged_paths
.append(p
)
444 for r
in removed_paths
:
446 run_svn(["remove", "--force", r
])
449 for m
in merged_paths
:
451 m_url
= svn_url
+ "/" + m
452 out
= run_svn(["merge", "-c", str(svn_rev
), "--non-recursive",
453 "--non-interactive", "--accept=theirs-full",
454 m_url
+"@"+str(svn_rev
), m
])
455 # if conflicts, use the copy from original_wc
456 if out
and out
.split()[0] == 'C':
457 print "\n### Conflicts ignored: %s, in revision: %s\n" \
459 run_svn(["revert", "--recursive", m
])
460 if os
.path
.isfile(m
):
461 shutil
.copy(original_wc
+ os
.sep
+ m
, m
)
464 print "Unrelated paths: "
465 print "*", unrelated_paths
468 if len (commit_paths
) > 99:
472 commit_from_svn_log_entry(log_entry
, commit_paths
,
473 keep_author
=keep_author
)
474 except ExternalCommandFailed
:
475 # try to ignore the Properties conflicts on files and dirs
476 # use the copy from original_wc
478 for d
in log_entry
['changed_paths']:
480 p
= p
[len(svn_path
):].strip("/")
481 if os
.path
.isfile(p
):
482 if os
.path
.isfile(p
+ ".prej"):
484 shutil
.copy(original_wc
+ os
.sep
+ p
, p
)
485 p2
=os
.sep
+ p
.replace('_', '__').replace('/', '_') \
486 + ".prej-" + str(svn_rev
)
487 shutil
.move(p
+ ".prej", os
.path
.dirname(original_wc
) + p2
)
488 w
="\n### Properties conflicts ignored:"
489 print "%s %s, in revision: %s\n" % (w
, p
, svn_rev
)
490 elif os
.path
.isdir(p
):
491 if os
.path
.isfile(p
+ os
.sep
+ "dir_conflicts.prej"):
493 p2
=os
.sep
+ p
.replace('_', '__').replace('/', '_') \
494 + "_dir__conflicts.prej-" + str(svn_rev
)
495 shutil
.move(p
+ os
.sep
+ "dir_conflicts.prej",
496 os
.path
.dirname(original_wc
) + p2
)
497 w
="\n### Properties conflicts ignored:"
498 print "%s %s, in revision: %s\n" % (w
, p
, svn_rev
)
499 out
= run_svn(["propget", "svn:ignore",
500 original_wc
+ os
.sep
+ p
])
502 run_svn(["propset", "svn:ignore", out
.strip(), p
])
503 out
= run_svn(["propget", "svn:externel",
504 original_wc
+ os
.sep
+ p
])
506 run_svn(["propset", "svn:external", out
.strip(), p
])
509 commit_from_svn_log_entry(log_entry
, commit_paths
,
510 keep_author
=keep_author
)
512 raise ExternalCommandFailed
516 usage
= "Usage: %prog [-a] [-c] [-r SVN rev] <Source SVN URL> <Target SVN URL>"
517 parser
= OptionParser(usage
)
518 parser
.add_option("-a", "--keep-author", action
="store_true",
519 dest
="keep_author", help="Keep revision Author or not")
520 parser
.add_option("-c", "--continue-from-break", action
="store_true",
521 dest
="cont_from_break",
522 help="Continue from previous break")
523 parser
.add_option("-r", "--svn-rev", type="int", dest
="svn_rev",
524 help="SVN revision to checkout from")
525 (options
, args
) = parser
.parse_args()
527 display_error("incorrect number of arguments\n\nTry: svn2svn.py --help",
530 source_url
= args
.pop(0).rstrip("/")
531 target_url
= args
.pop(0).rstrip("/")
532 if options
.keep_author
:
537 # Find the greatest_rev
538 # don't use 'svn info' to get greatest_rev, it doesn't work sometimes
539 svn_log
= get_one_svn_log_entry(source_url
, "HEAD", "HEAD")
540 greatest_rev
= svn_log
['revision']
542 original_wc
= "_original_wc"
545 ## old working copy does not exist, disable continue mode
546 if not os
.path
.exists(dup_wc
):
547 options
.cont_from_break
= False
549 if not options
.cont_from_break
:
550 # Warn if Target SVN URL existed
551 cmd
= find_program("svn")
552 pipe
= Popen([cmd
] + ["list"] + [target_url
], executable
=cmd
,
553 stdout
=PIPE
, stderr
=PIPE
)
554 out
, err
= pipe
.communicate()
555 if pipe
.returncode
== 0:
556 print "Target SVN URL: %s existed!" % target_url
559 print "Press 'Enter' to Continue, 'Ctrl + C' to Cancel..."
560 print "(Timeout in 5 seconds)"
561 rfds
, wfds
, efds
= select
.select([sys
.stdin
], [], [], 5)
563 # Get log entry for the SVN revision we will check out
565 # If specify a rev, get log entry just before or at rev
566 svn_start_log
= get_last_svn_log_entry(source_url
, 1,
569 # Otherwise, get log entry of branch creation
570 svn_start_log
= get_first_svn_log_entry(source_url
, 1,
573 # This is the revision we will checkout from
574 svn_rev
= svn_start_log
['revision']
576 # Check out first revision (changeset) from Source SVN URL
577 if os
.path
.exists(original_wc
):
578 shutil
.rmtree(original_wc
)
579 svn_checkout(source_url
, original_wc
, svn_rev
)
581 # Import first revision (changeset) into Target SVN URL
582 timestamp
= int(svn_start_log
['date'])
583 svn_date
= str(datetime
.fromtimestamp(timestamp
))
585 run_svn(["import", original_wc
, target_url
, "-m",
586 svn_start_log
['message'] + "\nDate: " + svn_date
,
587 "--username", svn_start_log
['author']])
589 run_svn(["import", original_wc
, target_url
, "-m",
590 svn_start_log
['message'] + "\nDate: " + svn_date
+
591 "\nAuthor: " + svn_start_log
['author']])
593 # Check out a working copy
594 if os
.path
.exists(dup_wc
):
595 shutil
.rmtree(dup_wc
)
596 svn_checkout(target_url
, dup_wc
)
598 original_wc
= os
.path
.abspath(original_wc
)
599 dup_wc
= os
.path
.abspath(dup_wc
)
603 svn_info
= get_svn_info(original_wc
)
604 # e.g. u'svn://svn.twistedmatrix.com/svn/Twisted'
605 repos_url
= svn_info
['repos_url']
606 # e.g. u'svn://svn.twistedmatrix.com/svn/Twisted/branches/xmpp'
607 svn_url
= svn_info
['url']
608 assert svn_url
.startswith(repos_url
)
609 # e.g. u'/branches/xmpp'
610 svn_path
= svn_url
[len(repos_url
):]
612 svn_branch
= svn_url
.split("/")[-1]
614 if options
.cont_from_break
:
615 svn_rev
= svn_info
['revision'] - 1
619 # Load SVN log starting from svn_rev + 1
620 it_log_entries
= iter_svn_log_entries(svn_url
, svn_rev
+ 1, greatest_rev
)
623 for log_entry
in it_log_entries
:
624 pull_svn_rev(log_entry
, svn_url
, target_url
, svn_path
,
625 original_wc
, keep_author
)
627 except KeyboardInterrupt:
628 print "\nStopped by user."
630 run_svn(["revert", "--recursive", "."])
632 print "\nCommand failed with following error:\n"
633 traceback
.print_exc()
635 run_svn(["revert", "--recursive", "."])
641 if __name__
== "__main__":