]>
Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn.py
5 Replicate changesets from one SVN repository to another,
6 includes diffs, comments, and Dates of each revision.
7 It's also possible to retain the Author info if the Target SVN URL
8 is in a local filesystem (ie, running svn2svn.py on Target SVN server),
9 or if Target SVN URL is managed through ssh tunnel.
10 In later case, please run 'ssh-add' (adds RSA or DSA identities to
11 the authentication agent) before invoking svn2svn.py.
13 For example (in Unix environment):
14 $ exec /usr/bin/ssh-agent $SHELL
16 Enter passphrase for /home/user/.ssh/id_dsa:
17 Identity added: /home/user/.ssh/id_dsa (/home/user/.ssh/id_dsa)
18 $ python ./svn2svn.py -a SOURCE TARGET
20 Written and used on Ubuntu 7.04 (Feisty Fawn).
21 Provided as-is and absolutely no warranty - aka Don't bet your life on it.
23 This tool re-used some modules from svnclient.py on project hgsvn
24 (a tool can create Mercurial repository from SVN repository):
25 http://cheeseshop.python.org/pypi/hgsvn
27 License: GPLv2, the same as hgsvn.
29 version 0.1.1; Jul 31, 2007; simford dot dong at gmail dot com
41 from optparse
import OptionParser
42 from subprocess
import Popen
, PIPE
43 from datetime
import datetime
46 from xml
.etree
import cElementTree
as ET
49 from xml
.etree
import ElementTree
as ET
52 import cElementTree
as ET
54 from elementtree
import ElementTree
as ET
56 svn_log_args
= ['log', '--xml', '-v']
57 svn_info_args
= ['info', '--xml']
58 svn_checkout_args
= ['checkout', '-q']
59 svn_status_args
= ['status', '--xml', '-v', '--ignore-externals']
61 # define exception class
62 class ExternalCommandFailed(RuntimeError):
64 An external command failed.
67 class ParameterError(RuntimeError):
69 An external command failed.
72 def display_error(message
, raise_exception
= True):
74 Display error message, then terminate.
76 print "Error:", message
79 raise ExternalCommandFailed
83 # Windows compatibility code by Bill Baxter
85 def find_program(name
):
87 Find the name of the program for Popen.
88 Windows is finnicky about having the complete file name. Popen
89 won't search the %PATH% for you automatically.
90 (Adapted from ctypes.find_library)
92 # See MSDN for the REAL search order.
93 base
, ext
= os
.path
.splitext(name
)
97 exts
= ['.bat', '.exe']
98 for directory
in os
.environ
['PATH'].split(os
.pathsep
):
100 fname
= os
.path
.join(directory
, base
+ e
)
101 if os
.path
.exists(fname
):
105 def find_program(name
):
107 Find the name of the program for Popen.
108 On Unix, popen isn't picky about having absolute paths.
117 return q
+ s
.replace('\\', '\\\\').replace("'", "'\"'\"'") + q
119 locale_encoding
= locale
.getpreferredencoding()
121 def run_svn(args
, fail_if_stderr
=False, encoding
="utf-8"):
124 exit if svn cmd failed
126 def _transform_arg(a
):
127 if isinstance(a
, unicode):
128 a
= a
.encode(encoding
or locale_encoding
)
129 elif not isinstance(a
, str):
132 t_args
= map(_transform_arg
, args
)
134 cmd
= find_program("svn")
135 cmd_string
= str(" ".join(map(shell_quote
, [cmd
] + t_args
)))
136 print "*", cmd_string
137 pipe
= Popen([cmd
] + t_args
, executable
=cmd
, stdout
=PIPE
, stderr
=PIPE
)
138 out
, err
= pipe
.communicate()
139 if pipe
.returncode
!= 0 or (fail_if_stderr
and err
.strip()):
140 display_error("External program failed (return code %d): %s\n%s"
141 % (pipe
.returncode
, cmd_string
, err
))
144 def svn_date_to_timestamp(svn_date
):
146 Parse an SVN date as read from the XML output and
147 return the corresponding timestamp.
149 # Strip microseconds and timezone (always UTC, hopefully)
150 # XXX there are various ISO datetime parsing routines out there,
151 # cf. http://seehuhn.de/comp/pdate
152 date
= svn_date
.split('.', 2)[0]
153 time_tuple
= time
.strptime(date
, "%Y-%m-%dT%H:%M:%S")
154 return calendar
.timegm(time_tuple
)
156 def parse_svn_info_xml(xml_string
):
158 Parse the XML output from an "svn info" command and extract
159 useful information as a dict.
162 tree
= ET
.fromstring(xml_string
)
163 entry
= tree
.find('.//entry')
165 d
['url'] = entry
.find('url').text
166 d
['revision'] = int(entry
.get('revision'))
167 d
['repos_url'] = tree
.find('.//repository/root').text
168 d
['last_changed_rev'] = int(tree
.find('.//commit').get('revision'))
169 d
['kind'] = entry
.get('kind')
172 def parse_svn_log_xml(xml_string
):
174 Parse the XML output from an "svn log" command and extract
175 useful information as a list of dicts (one per log changeset).
178 tree
= ET
.fromstring(xml_string
)
179 for entry
in tree
.findall('logentry'):
181 d
['revision'] = int(entry
.get('revision'))
182 # Some revisions don't have authors, most notably
183 # the first revision in a repository.
184 author
= entry
.find('author')
185 d
['author'] = author
is not None and author
.text
or None
186 d
['date'] = svn_date_to_timestamp(entry
.find('date').text
)
187 # Some revisions may have empty commit message
188 message
= entry
.find('msg')
189 message
= message
is not None and message
.text
is not None \
190 and message
.text
.strip() or ""
191 # Replace DOS return '\r\n' and MacOS return '\r' with unix return '\n'
192 d
['message'] = message
.replace('\r\n', '\n').replace('\n\r', '\n'). \
194 paths
= d
['changed_paths'] = []
195 for path
in entry
.findall('.//path'):
196 copyfrom_rev
= path
.get('copyfrom-rev')
198 copyfrom_rev
= int(copyfrom_rev
)
201 'action': path
.get('action'),
202 'copyfrom_path': path
.get('copyfrom-path'),
203 'copyfrom_revision': copyfrom_rev
,
208 def parse_svn_status_xml(xml_string
, base_dir
=None):
210 Parse the XML output from an "svn status" command and extract
211 useful info as a list of dicts (one per status entry).
214 tree
= ET
.fromstring(xml_string
)
215 for entry
in tree
.findall('.//entry'):
217 path
= entry
.get('path')
218 if base_dir
is not None:
219 assert path
.startswith(base_dir
)
220 path
= path
[len(base_dir
):].lstrip('/\\')
222 wc_status
= entry
.find('wc-status')
223 if wc_status
.get('item') == 'external':
224 d
['type'] = 'external'
225 elif wc_status
.get('revision') is not None:
228 d
['type'] = 'unversioned'
232 def get_svn_info(svn_url_or_wc
, rev_number
=None):
234 Get SVN information for the given URL or working copy,
235 with an optionally specified revision number.
236 Returns a dict as created by parse_svn_info_xml().
238 if rev_number
is not None:
239 args
= [svn_url_or_wc
+ "@" + str(rev_number
)]
241 args
= [svn_url_or_wc
]
242 xml_string
= run_svn(svn_info_args
+ args
,
244 return parse_svn_info_xml(xml_string
)
246 def svn_checkout(svn_url
, checkout_dir
, rev_number
=None):
248 Checkout the given URL at an optional revision number.
251 if rev_number
is not None:
252 args
+= ['-r', rev_number
]
253 args
+= [svn_url
, checkout_dir
]
254 return run_svn(svn_checkout_args
+ args
)
256 def run_svn_log(svn_url_or_wc
, rev_start
, rev_end
, limit
, stop_on_copy
=False):
258 Fetch up to 'limit' SVN log entries between the given revisions.
261 args
= ['--stop-on-copy']
264 args
+= ['-r', '%s:%s' % (rev_start
, rev_end
), '--limit',
265 str(limit
), svn_url_or_wc
]
266 xml_string
= run_svn(svn_log_args
+ args
)
267 return parse_svn_log_xml(xml_string
)
269 def get_svn_status(svn_wc
):
271 Get SVN status information about the given working copy.
273 # Ensure proper stripping by canonicalizing the path
274 svn_wc
= os
.path
.abspath(svn_wc
)
276 xml_string
= run_svn(svn_status_args
+ args
)
277 return parse_svn_status_xml(xml_string
, svn_wc
)
279 def get_one_svn_log_entry(svn_url
, rev_start
, rev_end
, stop_on_copy
=False):
281 Get the first SVN log entry in the requested revision range.
283 entries
= run_svn_log(svn_url
, rev_start
, rev_end
, 1, stop_on_copy
)
285 display_error("No SVN log for %s between revisions %s and %s" %
286 (svn_url
, rev_start
, rev_end
))
290 def get_first_svn_log_entry(svn_url
, rev_start
, rev_end
):
292 Get the first log entry after/at the given revision number in an SVN branch.
293 By default the revision number is set to 0, which will give you the log
294 entry corresponding to the branch creaction.
296 NOTE: to know whether the branch creation corresponds to an SVN import or
297 a copy from another branch, inspect elements of the 'changed_paths' entry
298 in the returned dictionary.
300 return get_one_svn_log_entry(svn_url
, rev_start
, rev_end
, stop_on_copy
=True)
302 def get_last_svn_log_entry(svn_url
, rev_start
, rev_end
):
304 Get the last log entry before/at the given revision number in an SVN branch.
305 By default the revision number is set to HEAD, which will give you the log
306 entry corresponding to the latest commit in branch.
308 return get_one_svn_log_entry(svn_url
, rev_end
, rev_start
, stop_on_copy
=True)
311 log_duration_threshold
= 10.0
312 log_min_chunk_length
= 10
314 def iter_svn_log_entries(svn_url
, first_rev
, last_rev
):
316 Iterate over SVN log entries between first_rev and last_rev.
318 This function features chunked log fetching so that it isn't too nasty
319 to the SVN server if many entries are requested.
322 chunk_length
= log_min_chunk_length
323 chunk_interval_factor
= 1.0
324 while last_rev
== "HEAD" or cur_rev
<= last_rev
:
325 start_t
= time
.time()
326 stop_rev
= min(last_rev
, cur_rev
+ int(chunk_length
* chunk_interval_factor
))
327 entries
= run_svn_log(svn_url
, cur_rev
, stop_rev
, chunk_length
)
328 duration
= time
.time() - start_t
330 if stop_rev
== last_rev
:
332 cur_rev
= stop_rev
+ 1
333 chunk_interval_factor
*= 2.0
337 cur_rev
= e
['revision'] + 1
338 # Adapt chunk length based on measured request duration
339 if duration
< log_duration_threshold
:
340 chunk_length
= int(chunk_length
* 2.0)
341 elif duration
> log_duration_threshold
* 2:
342 chunk_length
= max(log_min_chunk_length
, int(chunk_length
/ 2.0))
344 def commit_from_svn_log_entry(entry
, files
=None, keep_author
=False):
346 Given an SVN log entry and an optional sequence of files, do an svn commit.
348 # This will use the local timezone for displaying commit times
349 timestamp
= int(entry
['date'])
350 svn_date
= str(datetime
.fromtimestamp(timestamp
))
351 # Uncomment this one one if you prefer UTC commit times
352 #svn_date = "%d 0" % timestamp
354 options
= ["ci", "--force-log", "-m", entry
['message'] + "\nDate: " + svn_date
, "--username", entry
['author']]
356 options
= ["ci", "--force-log", "-m", entry
['message'] + "\nDate: " + svn_date
+ "\nAuthor: " + entry
['author']]
358 options
+= list(files
)
362 # set p = "." when p = ""
363 #p = p.strip() or "."
364 if p
.strip() and not os
.path
.exists(p
+ os
.sep
+ ".svn"):
365 svn_add_dir(os
.path
.dirname(p
))
366 if not os
.path
.exists(p
):
370 def pull_svn_rev(log_entry
, svn_url
, target_url
, svn_path
, original_wc
, keep_author
=False):
372 Pull SVN changes from the given log entry.
373 Returns the new SVN revision.
374 If an exception occurs, it will rollback to revision 'svn_rev - 1'.
376 svn_rev
= log_entry
['revision']
377 run_svn(["up", "--ignore-externals", "-r", svn_rev
, original_wc
])
383 for d
in log_entry
['changed_paths']:
384 # e.g. u'/branches/xmpp/twisted/words/test/test.py'
386 if not p
.startswith(svn_path
+ "/"):
387 # Ignore changed files that are not part of this subdir
389 unrelated_paths
.append(p
)
391 # e.g. u'twisted/words/test/test.py'
392 p
= p
[len(svn_path
):].strip("/")
395 if action
not in 'MARD':
396 display_error("In SVN rev. %d: action '%s' not supported. \
397 Please report a bug!" % (svn_rev
, action
))
399 if len (commit_paths
) < 100:
400 commit_paths
.append(p
)
401 # Detect special cases
402 old_p
= d
['copyfrom_path']
403 if old_p
and old_p
.startswith(svn_path
+ "/"):
404 old_p
= old_p
[len(svn_path
):].strip("/")
405 # Both paths can be identical if copied from an old rev.
406 # We treat like it a normal change.
408 if not os
.path
.exists(p
+ os
.sep
+ '.svn'):
409 svn_add_dir(os
.path
.dirname(p
))
410 run_svn(["up", old_p
])
411 run_svn(["copy", old_p
, p
])
412 if os
.path
.isfile(p
):
413 shutil
.copy(original_wc
+ os
.sep
+ p
, p
)
415 removed_paths
.append(old_p
)
416 if len (commit_paths
) < 100:
417 commit_paths
.append(old_p
)
420 if os
.path
.isdir(original_wc
+ os
.sep
+ p
):
423 p_path
= os
.path
.dirname(p
).strip() or '.'
425 shutil
.copy(original_wc
+ os
.sep
+ p
, p
)
428 removed_paths
.append(p
)
429 else: # action == 'M'
430 merged_paths
.append(p
)
433 for r
in removed_paths
:
435 run_svn(["remove", "--force", r
])
438 for m
in merged_paths
:
440 m_url
= svn_url
+ "/" + m
441 out
= run_svn(["merge", "-c", str(svn_rev
), "--non-recursive",
442 "--non-interactive", "--accept=theirs-full",
443 m_url
+"@"+str(svn_rev
), m
])
444 # if conflicts, use the copy from original_wc
445 if out
and out
.split()[0] == 'C':
446 print "\n### Conflicts ignored: %s, in revision: %s\n" \
448 run_svn(["revert", "--recursive", m
])
449 if os
.path
.isfile(m
):
450 shutil
.copy(original_wc
+ os
.sep
+ m
, m
)
453 print "Unrelated paths: "
454 print "*", unrelated_paths
457 if len (commit_paths
) > 99:
461 commit_from_svn_log_entry(log_entry
, commit_paths
,
462 keep_author
=keep_author
)
463 except ExternalCommandFailed
:
464 # try to ignore the Properties conflicts on files and dirs
465 # use the copy from original_wc
467 for d
in log_entry
['changed_paths']:
469 p
= p
[len(svn_path
):].strip("/")
470 if os
.path
.isfile(p
):
471 if os
.path
.isfile(p
+ ".prej"):
473 shutil
.copy(original_wc
+ os
.sep
+ p
, p
)
474 p2
=os
.sep
+ p
.replace('_', '__').replace('/', '_') \
475 + ".prej-" + str(svn_rev
)
476 shutil
.move(p
+ ".prej", os
.path
.dirname(original_wc
) + p2
)
477 w
="\n### Properties conflicts ignored:"
478 print "%s %s, in revision: %s\n" % (w
, p
, svn_rev
)
479 elif os
.path
.isdir(p
):
480 if os
.path
.isfile(p
+ os
.sep
+ "dir_conflicts.prej"):
482 p2
=os
.sep
+ p
.replace('_', '__').replace('/', '_') \
483 + "_dir__conflicts.prej-" + str(svn_rev
)
484 shutil
.move(p
+ os
.sep
+ "dir_conflicts.prej",
485 os
.path
.dirname(original_wc
) + p2
)
486 w
="\n### Properties conflicts ignored:"
487 print "%s %s, in revision: %s\n" % (w
, p
, svn_rev
)
488 out
= run_svn(["propget", "svn:ignore",
489 original_wc
+ os
.sep
+ p
])
491 run_svn(["propset", "svn:ignore", out
.strip(), p
])
492 out
= run_svn(["propget", "svn:externel",
493 original_wc
+ os
.sep
+ p
])
495 run_svn(["propset", "svn:external", out
.strip(), p
])
498 commit_from_svn_log_entry(log_entry
, commit_paths
,
499 keep_author
=keep_author
)
501 raise ExternalCommandFailed
505 usage
= "Usage: %prog [-a] [-c] [-r SVN rev] <Source SVN URL> <Target SVN URL>"
506 parser
= OptionParser(usage
)
507 parser
.add_option("-a", "--keep-author", action
="store_true",
508 dest
="keep_author", help="Keep revision Author or not")
509 parser
.add_option("-c", "--continue-from-break", action
="store_true",
510 dest
="cont_from_break",
511 help="Continue from previous break")
512 parser
.add_option("-r", "--svn-rev", type="int", dest
="svn_rev",
513 help="SVN revision to checkout from")
514 (options
, args
) = parser
.parse_args()
516 display_error("incorrect number of arguments\n\nTry: svn2svn.py --help",
519 source_url
= args
.pop(0).rstrip("/")
520 target_url
= args
.pop(0).rstrip("/")
521 if options
.keep_author
:
526 # Find the greatest_rev
527 # don't use 'svn info' to get greatest_rev, it doesn't work sometimes
528 svn_log
= get_one_svn_log_entry(source_url
, "HEAD", "HEAD")
529 greatest_rev
= svn_log
['revision']
531 original_wc
= "_original_wc"
534 ## old working copy does not exist, disable continue mode
535 if not os
.path
.exists(dup_wc
):
536 options
.cont_from_break
= False
538 if not options
.cont_from_break
:
539 # Warn if Target SVN URL existed
540 cmd
= find_program("svn")
541 pipe
= Popen([cmd
] + ["list"] + [target_url
], executable
=cmd
,
542 stdout
=PIPE
, stderr
=PIPE
)
543 out
, err
= pipe
.communicate()
544 if pipe
.returncode
== 0:
545 print "Target SVN URL: %s existed!" % target_url
548 print "Press 'Enter' to Continue, 'Ctrl + C' to Cancel..."
549 print "(Timeout in 5 seconds)"
550 rfds
, wfds
, efds
= select
.select([sys
.stdin
], [], [], 5)
552 # Get log entry for the SVN revision we will check out
554 # If specify a rev, get log entry just before or at rev
555 svn_start_log
= get_last_svn_log_entry(source_url
, 1,
558 # Otherwise, get log entry of branch creation
559 svn_start_log
= get_first_svn_log_entry(source_url
, 1,
562 # This is the revision we will checkout from
563 svn_rev
= svn_start_log
['revision']
565 # Check out first revision (changeset) from Source SVN URL
566 if os
.path
.exists(original_wc
):
567 shutil
.rmtree(original_wc
)
568 svn_checkout(source_url
, original_wc
, svn_rev
)
570 # Import first revision (changeset) into Target SVN URL
571 timestamp
= int(svn_start_log
['date'])
572 svn_date
= str(datetime
.fromtimestamp(timestamp
))
574 run_svn(["import", original_wc
, target_url
, "-m",
575 svn_start_log
['message'] + "\nDate: " + svn_date
,
576 "--username", svn_start_log
['author']])
578 run_svn(["import", original_wc
, target_url
, "-m",
579 svn_start_log
['message'] + "\nDate: " + svn_date
+
580 "\nAuthor: " + svn_start_log
['author']])
582 # Check out a working copy
583 if os
.path
.exists(dup_wc
):
584 shutil
.rmtree(dup_wc
)
585 svn_checkout(target_url
, dup_wc
)
587 original_wc
= os
.path
.abspath(original_wc
)
588 dup_wc
= os
.path
.abspath(dup_wc
)
592 svn_info
= get_svn_info(original_wc
)
593 # e.g. u'svn://svn.twistedmatrix.com/svn/Twisted'
594 repos_url
= svn_info
['repos_url']
595 # e.g. u'svn://svn.twistedmatrix.com/svn/Twisted/branches/xmpp'
596 svn_url
= svn_info
['url']
597 assert svn_url
.startswith(repos_url
)
598 # e.g. u'/branches/xmpp'
599 svn_path
= svn_url
[len(repos_url
):]
601 svn_branch
= svn_url
.split("/")[-1]
603 if options
.cont_from_break
:
604 svn_rev
= svn_info
['revision'] - 1
608 # Load SVN log starting from svn_rev + 1
609 it_log_entries
= iter_svn_log_entries(svn_url
, svn_rev
+ 1, greatest_rev
)
612 for log_entry
in it_log_entries
:
613 pull_svn_rev(log_entry
, svn_url
, target_url
, svn_path
,
614 original_wc
, keep_author
)
616 except KeyboardInterrupt:
617 print "\nStopped by user."
619 run_svn(["revert", "--recursive", "."])
621 print "\nCommand failed with following error:\n"
622 traceback
.print_exc()
624 run_svn(["revert", "--recursive", "."])
630 if __name__
== "__main__":