5 Replicate (replay) changesets from one SVN repository to another:
6 * Maintains full logical history (e.g. uses "svn copy" for renames).
7 * Maintains original commit messages.
8 * Cannot maintain original commit date, but appends original commit date
9 for each commit message: "Date: %d".
10 * Optionally maintain source author info. (Only supported if accessing
11 target SVN repo via file://)
12 * Optionally run an external shell script before each replayed commit
13 to give the ability to dynamically exclude or modify files as part
16 License: GPLv2, the same as hgsvn.
17 Author: Tony Duckles (https://github.com/tonyduckles/svn2svn)
18 (This is a forked and modified verison of http://code.google.com/p/svn2svn/)
30 from optparse
import OptionParser
31 from subprocess
import Popen
, PIPE
32 from datetime
import datetime
35 from xml
.etree
import cElementTree
as ET
38 from xml
.etree
import ElementTree
as ET
41 import cElementTree
as ET
43 from elementtree
import ElementTree
as ET
45 svn_log_args
= ['log', '--xml']
46 svn_info_args
= ['info', '--xml']
47 svn_checkout_args
= ['checkout', '-q']
48 svn_status_args
= ['status', '--xml', '-v', '--ignore-externals']
52 debug_runsvn_timing
= False # Display how long each "svn" OS command took to run?
53 # Setup verbosity options
54 runsvn_showcmd
= False # Display every "svn" OS command we run?
55 runsvn_showout
= False # Display the stdout results from every "svn" OS command we run?
56 svnlog_verbose
= True # Display each action + changed-path as we walk the history?
58 # define exception class
59 class ExternalCommandFailed(RuntimeError):
61 An external command failed.
64 def display_error(message
, raise_exception
= True):
66 Display error message, then terminate.
68 print "Error:", message
71 raise ExternalCommandFailed
75 # Windows compatibility code by Bill Baxter
77 def find_program(name
):
79 Find the name of the program for Popen.
80 Windows is finnicky about having the complete file name. Popen
81 won't search the %PATH% for you automatically.
82 (Adapted from ctypes.find_library)
84 # See MSDN for the REAL search order.
85 base
, ext
= os
.path
.splitext(name
)
89 exts
= ['.bat', '.exe']
90 for directory
in os
.environ
['PATH'].split(os
.pathsep
):
92 fname
= os
.path
.join(directory
, base
+ e
)
93 if os
.path
.exists(fname
):
97 def find_program(name
):
99 Find the name of the program for Popen.
100 On Unix, popen isn't picky about having absolute paths.
109 return q
+ s
.replace('\\', '\\\\').replace("'", "'\"'\"'") + q
111 locale_encoding
= locale
.getpreferredencoding()
113 def run_svn(args
, fail_if_stderr
=False, encoding
="utf-8"):
116 exit if svn cmd failed
118 def _transform_arg(a
):
119 if isinstance(a
, unicode):
120 a
= a
.encode(encoding
or locale_encoding
)
121 elif not isinstance(a
, str):
124 t_args
= map(_transform_arg
, args
)
126 cmd
= find_program("svn")
127 cmd_string
= str(" ".join(map(shell_quote
, [cmd
] + t_args
)))
129 print "$", "("+os
.getcwd()+")", cmd_string
130 if debug_runsvn_timing
:
132 pipe
= Popen([cmd
] + t_args
, executable
=cmd
, stdout
=PIPE
, stderr
=PIPE
)
133 out
, err
= pipe
.communicate()
134 if debug_runsvn_timing
:
136 print "(" + str(round(time2
-time1
,4)) + " elapsed)"
137 if out
and runsvn_showout
:
139 if pipe
.returncode
!= 0 or (fail_if_stderr
and err
.strip()):
140 display_error("External program failed (return code %d): %s\n%s"
141 % (pipe
.returncode
, cmd_string
, err
))
144 def svn_date_to_timestamp(svn_date
):
146 Parse an SVN date as read from the XML output and
147 return the corresponding timestamp.
149 # Strip microseconds and timezone (always UTC, hopefully)
150 # XXX there are various ISO datetime parsing routines out there,
151 # cf. http://seehuhn.de/comp/pdate
152 date
= svn_date
.split('.', 2)[0]
153 time_tuple
= time
.strptime(date
, "%Y-%m-%dT%H:%M:%S")
154 return calendar
.timegm(time_tuple
)
156 def parse_svn_info_xml(xml_string
):
158 Parse the XML output from an "svn info" command and extract
159 useful information as a dict.
162 tree
= ET
.fromstring(xml_string
)
163 entry
= tree
.find('.//entry')
165 d
['url'] = entry
.find('url').text
166 d
['revision'] = int(entry
.get('revision'))
167 d
['repos_url'] = tree
.find('.//repository/root').text
168 d
['last_changed_rev'] = int(tree
.find('.//commit').get('revision'))
169 d
['kind'] = entry
.get('kind')
172 def parse_svn_log_xml(xml_string
):
174 Parse the XML output from an "svn log" command and extract
175 useful information as a list of dicts (one per log changeset).
178 tree
= ET
.fromstring(xml_string
)
179 for entry
in tree
.findall('logentry'):
181 d
['revision'] = int(entry
.get('revision'))
182 # Some revisions don't have authors, most notably
183 # the first revision in a repository.
184 author
= entry
.find('author')
185 d
['author'] = author
is not None and author
.text
or None
186 d
['date'] = svn_date_to_timestamp(entry
.find('date').text
)
187 # Some revisions may have empty commit message
188 message
= entry
.find('msg')
189 message
= message
is not None and message
.text
is not None \
190 and message
.text
.strip() or ""
191 # Replace DOS return '\r\n' and MacOS return '\r' with unix return '\n'
192 d
['message'] = message
.replace('\r\n', '\n').replace('\n\r', '\n'). \
194 paths
= d
['changed_paths'] = []
195 for path
in entry
.findall('.//path'):
196 copyfrom_rev
= path
.get('copyfrom-rev')
198 copyfrom_rev
= int(copyfrom_rev
)
201 'kind': path
.get('kind'),
202 'action': path
.get('action'),
203 'copyfrom_path': path
.get('copyfrom-path'),
204 'copyfrom_revision': copyfrom_rev
,
206 # Need to sort paths (i.e. into hierarchical order), so that process_svn_log_entry()
207 # can process actions in depth-first order.
212 def parse_svn_status_xml(xml_string
, base_dir
=None):
214 Parse the XML output from an "svn status" command and extract
215 useful info as a list of dicts (one per status entry).
218 tree
= ET
.fromstring(xml_string
)
219 for entry
in tree
.findall('.//entry'):
221 path
= entry
.get('path')
222 if base_dir
is not None:
223 assert path
.startswith(base_dir
)
224 path
= path
[len(base_dir
):].lstrip('/\\')
226 wc_status
= entry
.find('wc-status')
227 if wc_status
.get('item') == 'external':
228 d
['type'] = 'external'
229 elif wc_status
.get('revision') is not None:
232 d
['type'] = 'unversioned'
236 def get_svn_info(svn_url_or_wc
, rev_number
=None):
238 Get SVN information for the given URL or working copy,
239 with an optionally specified revision number.
240 Returns a dict as created by parse_svn_info_xml().
242 if rev_number
is not None:
243 args
= [svn_url_or_wc
+ "@" + str(rev_number
)]
245 args
= [svn_url_or_wc
]
246 xml_string
= run_svn(svn_info_args
+ args
, fail_if_stderr
=True)
247 return parse_svn_info_xml(xml_string
)
249 def svn_checkout(svn_url
, checkout_dir
, rev_number
=None):
251 Checkout the given URL at an optional revision number.
254 if rev_number
is not None:
255 args
+= ['-r', rev_number
]
256 args
+= [svn_url
, checkout_dir
]
257 return run_svn(svn_checkout_args
+ args
)
259 def run_svn_log(svn_url_or_wc
, rev_start
, rev_end
, limit
, stop_on_copy
=False, get_changed_paths
=True):
261 Fetch up to 'limit' SVN log entries between the given revisions.
264 args
= ['--stop-on-copy']
267 url
= str(svn_url_or_wc
)
268 if rev_start
!= 'HEAD' and rev_end
!= 'HEAD':
269 args
+= ['-r', '%s:%s' % (rev_start
, rev_end
)]
270 if not "@" in svn_url_or_wc
:
271 url
+= "@" + str(max(rev_start
, rev_end
))
272 if get_changed_paths
:
274 args
+= ['--limit', str(limit
), url
]
275 xml_string
= run_svn(svn_log_args
+ args
)
276 return parse_svn_log_xml(xml_string
)
278 def get_svn_status(svn_wc
, flags
=None):
280 Get SVN status information about the given working copy.
282 # Ensure proper stripping by canonicalizing the path
283 svn_wc
= os
.path
.abspath(svn_wc
)
288 xml_string
= run_svn(svn_status_args
+ args
)
289 return parse_svn_status_xml(xml_string
, svn_wc
)
291 def get_one_svn_log_entry(svn_url
, rev_start
, rev_end
, stop_on_copy
=False, get_changed_paths
=True):
293 Get the first SVN log entry in the requested revision range.
295 entries
= run_svn_log(svn_url
, rev_start
, rev_end
, 1, stop_on_copy
, get_changed_paths
)
297 display_error("No SVN log for %s between revisions %s and %s" %
298 (svn_url
, rev_start
, rev_end
))
302 def get_first_svn_log_entry(svn_url
, rev_start
, rev_end
, get_changed_paths
=True):
304 Get the first log entry after/at the given revision number in an SVN branch.
305 By default the revision number is set to 0, which will give you the log
306 entry corresponding to the branch creaction.
308 NOTE: to know whether the branch creation corresponds to an SVN import or
309 a copy from another branch, inspect elements of the 'changed_paths' entry
310 in the returned dictionary.
312 return get_one_svn_log_entry(svn_url
, rev_start
, rev_end
, stop_on_copy
=True, get_changed_paths
=True)
314 def get_last_svn_log_entry(svn_url
, rev_start
, rev_end
, get_changed_paths
=True):
316 Get the last log entry before/at the given revision number in an SVN branch.
317 By default the revision number is set to HEAD, which will give you the log
318 entry corresponding to the latest commit in branch.
320 return get_one_svn_log_entry(svn_url
, rev_end
, rev_start
, stop_on_copy
=True, get_changed_paths
=True)
323 log_duration_threshold
= 10.0
324 log_min_chunk_length
= 10
326 def iter_svn_log_entries(svn_url
, first_rev
, last_rev
):
328 Iterate over SVN log entries between first_rev and last_rev.
330 This function features chunked log fetching so that it isn't too nasty
331 to the SVN server if many entries are requested.
334 chunk_length
= log_min_chunk_length
335 chunk_interval_factor
= 1.0
336 while last_rev
== "HEAD" or cur_rev
<= last_rev
:
337 start_t
= time
.time()
338 stop_rev
= min(last_rev
, cur_rev
+ int(chunk_length
* chunk_interval_factor
))
339 entries
= run_svn_log(svn_url
, cur_rev
, stop_rev
, chunk_length
)
340 duration
= time
.time() - start_t
342 if stop_rev
== last_rev
:
344 cur_rev
= stop_rev
+ 1
345 chunk_interval_factor
*= 2.0
349 cur_rev
= e
['revision'] + 1
350 # Adapt chunk length based on measured request duration
351 if duration
< log_duration_threshold
:
352 chunk_length
= int(chunk_length
* 2.0)
353 elif duration
> log_duration_threshold
* 2:
354 chunk_length
= max(log_min_chunk_length
, int(chunk_length
/ 2.0))
356 def commit_from_svn_log_entry(entry
, files
=None, keep_author
=False):
358 Given an SVN log entry and an optional sequence of files, do an svn commit.
360 # TODO: Run optional external shell hook here, for doing pre-commit filtering
361 # This will use the local timezone for displaying commit times
362 timestamp
= int(entry
['date'])
363 svn_date
= str(datetime
.fromtimestamp(timestamp
))
364 # Uncomment this one one if you prefer UTC commit times
365 #svn_date = "%d 0" % timestamp
367 options
= ["ci", "--force-log", "-m", entry
['message'] + "\nDate: " + svn_date
, "--username", entry
['author']]
369 options
= ["ci", "--force-log", "-m", entry
['message'] + "\nDate: " + svn_date
+ "\nAuthor: " + entry
['author']]
371 options
+= list(files
)
376 Check if a given file/folder is being tracked by Subversion.
377 Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
378 With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
379 Use "svn status" to check the status of the file/folder.
381 # TODO: Is there a better way to do this?
382 entries
= get_svn_status(p
)
386 return (d
['type'] == 'normal')
388 def find_svn_ancestors(source_repos_url
, source_base
, source_offset
, copyfrom_path
, copyfrom_rev
):
390 Given a copy-from path (copyfrom_path), walk the SVN history backwards to inspect
391 the ancestory of that path. Build a collection of copyfrom_path+revision pairs
392 for each of the branch-copies since the initial branch-creation. If we find a
393 copyfrom_path which source_base is a substring match of (e.g. we crawled back to
394 the initial branch-copy from trunk), then return the collection of ancestor paths.
395 Otherwise, copyfrom_path has no ancestory compared to source_base.
397 This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
398 file/folder was renamed in a branch and then that branch was merged back to trunk.
401 * source_repos_url = Full URL to root of repository, e.g. 'file:///path/to/repos'
402 * source_base = e.g. '/trunk'
403 * source_offset = e.g. 'projectA/file1.txt'
404 * copyfrom_path = e.g. '/branches/bug123/projectA/file1.txt'
408 working_path
= copyfrom_path
409 working_base
= copyfrom_path
[:-len(source_offset
)].rstrip('/')
410 working_offset
= source_offset
.strip('/')
411 working_rev
= copyfrom_rev
412 ancestors
= [{'path': [working_base, working_offset], 'revision': working_rev}
]
414 # Get the first "svn log" entry for this path (relative to @rev)
415 #working_path = working_base + "/" + working_offset
417 print ">> find_svn_ancestors: " + source_repos_url
+ working_path
+ "@" + str(working_rev
) + \
418 " (" + working_base
+ " " + working_offset
+ ")"
419 log_entry
= get_first_svn_log_entry(source_repos_url
+ working_path
+ "@" + str(working_rev
), 1, str(working_rev
), True)
422 # Find the action for our working_path in this revision
423 for d
in log_entry
['changed_paths']:
425 if not path
in working_path
:
427 # Check action-type for this file
429 if action
not in 'MARD':
430 display_error("In SVN rev. %d: action '%s' not supported. \
431 Please report a bug!" % (log_entry
['revision'], action
))
433 debug_desc
= ": " + action
+ " " + path
434 if d
['copyfrom_path']:
435 debug_desc
+= " (from " + d
['copyfrom_path'] + "@" + str(d
['copyfrom_revision']) + ")"
439 # If file/folder was replaced, it has no ancestor
442 # If file/folder was deleted, it has no ancestor
445 # If file/folder was added but not a copy, it has no ancestor
446 if not d
['copyfrom_path']:
448 # Else, file/folder was added and is a copy, so check ancestors
449 path_old
= d
['copyfrom_path']
450 working_path
= working_path
.replace(path
, path_old
)
451 if working_base
in working_path
:
452 # If the new and old working_path share the same working_base, just need to update working_offset.
453 working_offset
= working_path
[len(working_base
)+1:]
455 # Else, assume that working_base has changed but working_offset is the same, e.g. a re-branch.
456 # TODO: Is this a safe assumption?!
457 working_base
= working_path
[:-len(working_offset
)].rstrip('/')
458 working_rev
= d
['copyfrom_revision']
460 print ">> find_svn_ancestors: copy-from: " + working_base
+ " " + working_offset
+ "@" + str(working_rev
)
461 ancestors
.append({'path': [working_base, working_offset], 'revision': working_rev}
)
462 # If we found a copy-from case which matches our source_base, we're done
463 if (path_old
== source_base
) or (path_old
.startswith(source_base
+ "/")):
465 # Else, follow the copy and keep on searching
469 def replay_svn_ancestors(ancestors
, source_repos_url
, source_url
, target_url
):
471 Given an array of ancestor info (find_svn_ancestors), replay the history
472 to correctly track renames ("svn copy/move") across branch-merges.
474 For example, consider a sequence of events like this:
475 1. svn copy /trunk /branches/fix1
476 2. (Make some changes on /branches/fix1)
477 3. svn copy /branches/fix1/Proj1 /branches/fix1/Proj2 " Rename folder
478 4. svn copy /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt " Rename file inside renamed folder
479 5. svn co /trunk && svn merge /branches/fix1
480 After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
481 and and add of /trunk/Proj2 comp-from /branches/fix1/Proj2. If we were just
482 to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
483 we'd lose the logical history that Proj2/file2.txt is really a descendant
486 'source_repos_url' is the full URL to the root of the source repository.
487 'ancestors' is the array returned by find_svn_ancestors() with the final
488 destination info appended to it by process_svn_log_entry().
491 # Ignore ancestors[0], which is the original (pre-branch-copy) trunk path
492 # Ignore ancestors[1], which is the original branch-creation commit
493 # Ignore ancestors[n], which is the final commit back to trunk
494 for idx
in range(1, len(ancestors
)-1):
495 ancestor
= ancestors
[idx
]
496 source_base
= ancestor
['path'][0]
497 source_offset
= ancestor
['path'][1]
498 source_path
= source_base
+ "/" + source_offset
499 source_rev
= ancestor
['revision']
500 source_rev_next
= ancestors
[idx
+1]['revision']
501 # Do a "svn log" on the _parent_ directory of source_path, since trying to get log info
502 # for the "old path" on the revision where the copy/move happened will fail.
503 if "/" in source_path
:
504 p_source_path
= source_path
[:source_path
.rindex('/')]
508 print ">> replay_svn_ancestors: ["+str(idx
)+"]" + source_path
+"@"+str(source_rev
) + " ["+p_source_path
+"@"+str(source_rev
)+":"+str(source_rev_next
-1)+"]"
509 it_log_entries
= iter_svn_log_entries(source_repos_url
+p_source_path
, source_rev
, source_rev_next
-1)
510 for log_entry
in it_log_entries
:
511 #print ">> replay_svn_ancestors: log_entry: (" + source_repos_url+source_base + ")"
513 process_svn_log_entry(log_entry
, source_repos_url
, source_repos_url
+source_base
, target_url
)
515 def process_svn_log_entry(log_entry
, source_repos_url
, source_url
, target_url
, source_offset
=""):
517 Process SVN changes from the given log entry.
518 Returns array of all the paths in the working-copy that were changed,
519 i.e. the paths which need to be "svn commit".
521 'log_entry' is the array structure built by parse_svn_log_xml().
522 'source_repos_url' is the full URL to the root of the source repository.
523 'source_url' is the full URL to the source path in the source repository.
524 'target_url' is the full URL to the target path in the target repository.
526 # Get the relative offset of source_url based on source_repos_url, e.g. u'/branches/bug123'
527 source_base
= source_url
[len(source_repos_url
):]
529 print ">> process_svn_log_entry: " + source_url
+ " (" + source_base
+ ")"
531 svn_rev
= log_entry
['revision']
538 for d
in log_entry
['changed_paths']:
540 msg
= " " + d
['action'] + " " + d
['path']
541 if d
['copyfrom_path']:
542 msg
+= " (from " + d
['copyfrom_path'] + "@" + str(d
['copyfrom_revision']) + ")"
544 # Get the full path for this changed_path
545 # e.g. u'/branches/bug123/projectA/file1.txt'
547 if not path
.startswith(source_base
+ "/"):
548 # Ignore changed files that are not part of this subdir
549 if path
!= source_base
:
550 print ">> process_svn_log_entry: Unrelated path: " + path
+ " (" + source_base
+ ")"
551 unrelated_paths
.append(path
)
553 # Calculate the offset (based on source_base) for this changed_path
554 # e.g. u'projectA/file1.txt'
555 # (path = source_base + "/" + path_offset)
556 path_offset
= path
[len(source_base
):].strip("/")
557 # Get the action for this path
559 if action
not in 'MARD':
560 display_error("In SVN rev. %d: action '%s' not supported. \
561 Please report a bug!" % (svn_rev
, action
))
563 # Try to be efficient and keep track of an explicit list of paths in the
564 # working copy that changed. If we commit from the root of the working copy,
565 # then SVN needs to crawl the entire working copy looking for pending changes.
566 # But, if we gather too many paths to commit, then we wipe commit_paths below
567 # and end-up doing a commit at the root of the working-copy.
568 if len (commit_paths
) < 100:
569 commit_paths
.append(path_offset
)
571 # Special-handling for replace's
574 # If file was "replaced" (deleted then re-added, all in same revision),
575 # then we need to run the "svn rm" first, then change action='A'. This
576 # lets the normal code below handle re-"svn add"'ing the files. This
577 # should replicate the "replace".
578 run_svn(["up", path_offset
])
579 run_svn(["remove", "--force", path_offset
])
583 # Handle all the various action-types
584 # (Handle "add" first, for "svn copy/move" support)
586 # Determine where to export from
587 copyfrom_rev
= svn_rev
590 # Handle cases where this "add" was a copy from another URL in the source repos
591 if d
['copyfrom_revision']:
592 copyfrom_rev
= d
['copyfrom_revision']
593 copyfrom_path
= d
['copyfrom_path']
595 print ">> process_svn_log_entry: copy-to: " + source_base
+ " " + source_offset
+ " " + path_offset
596 if source_base
in copyfrom_path
:
597 # If the copy-from path is inside the current working-copy, no need to check ancestry.
599 copyfrom_path
= copyfrom_path
[len(source_base
):].strip("/")
601 print ">> process_svn_log_entry: Found copy: " + copyfrom_path
+"@"+str(copyfrom_rev
)
604 ancestors
= find_svn_ancestors(source_repos_url
, source_base
, path_offset
,
605 copyfrom_path
, copyfrom_rev
)
607 # Reverse the list, so that we loop in chronological order
609 # Append the current revision
610 ancestors
.append({'path': [source_base, path_offset], 'revision': svn_rev}
)
611 # ancestors[0] is the original (pre-branch-copy) trunk path.
612 # ancestors[1] is the first commit on the new branch.
613 copyfrom_rev
= ancestors
[0]['revision']
614 copyfrom_base
= ancestors
[0]['path'][0]
615 copyfrom_offset
= ancestors
[0]['path'][1]
616 copyfrom_path
= copyfrom_base
+ copyfrom_offset
618 print ">> process_svn_log_entry: FOUND PARENT:"
619 for idx
in range(0,len(ancestors
)):
620 ancestor
= ancestors
[idx
]
621 print " ["+str(idx
)+"] " + ancestor
['path'][0]+" "+ancestor
['path'][1]+"@"+str(ancestor
['revision'])
622 #print ">> process_svn_log_entry: copyfrom_path (before): " + copyfrom_path + " source_base: " + source_base + " p: " + p
623 copyfrom_path
= copyfrom_path
[len(source_base
):].strip("/")
624 #print ">> process_svn_log_entry: copyfrom_path (after): " + copyfrom_path
626 # If this add was a copy-from, do a smart replay of the ancestors' history.
627 # Else just copy/export the files from the source repo and "svn add" them.
630 print ">> process_svn_log_entry: svn_copy: copy-from: " + copyfrom_path
+"@"+str(copyfrom_rev
) + " source_base: "+source_base
+ " len(ancestors): " + str(len(ancestors
))
631 # If we don't have any ancestors, then this is just a straight "svn copy" in the current working-copy.
633 # ...but not if the target is already tracked, because this might run several times for the same path.
634 # TODO: Is there a better way to avoid recusion bugs? Maybe a collection of processed paths?
635 if not in_svn(path_offset
):
636 run_svn(["copy", copyfrom_path
, path_offset
])
638 # Replay any actions which happened to this folder from the ancestor path(s).
639 replay_svn_ancestors(ancestors
, source_repos_url
, source_url
, target_url
)
641 # Create (parent) directory if needed
642 if d
['kind'] == 'dir':
645 p_path
= os
.path
.dirname(path_offset
).strip() or '.'
646 if not os
.path
.exists(p_path
):
648 # Export the entire added tree.
649 run_svn(["export", "--force", "-r", str(copyfrom_rev
),
650 source_repos_url
+ copyfrom_path
+ "@" + str(copyfrom_rev
), path_offset
])
651 # TODO: The "no in_svn" condition here is wrong for replace cases.
652 # Added the in_svn condition here originally since "svn export" is recursive
653 # but "svn log" will have an entry for each indiv file, hence we run into a
654 # cannot-re-add-file-which-is-already-added issue.
655 if (not in_svn(path_offset
)) or (is_replace
):
656 run_svn(["add", "--parents", path_offset
])
657 # TODO: Need to copy SVN properties from source repos
660 # Queue "svn remove" commands, to allow the action == 'A' handling the opportunity
661 # to do smart "svn copy" handling on copy/move/renames.
662 removed_paths
.append(path_offset
)
666 display_error("Internal Error: Handling for action='R' not implemented yet.")
669 modified_paths
.append(path_offset
)
672 display_error("Internal Error: pull_svn_rev: Unhandled 'action' value: '" + action
+ "'")
675 for r
in removed_paths
:
676 # TODO: Is the "svn up" here needed?
678 run_svn(["remove", "--force", r
])
681 for m
in modified_paths
:
682 # TODO: Is the "svn up" here needed?
684 m_url
= source_url
+ "/" + m
685 out
= run_svn(["merge", "-c", str(svn_rev
), "--non-recursive",
686 "--non-interactive", "--accept=theirs-full",
687 m_url
+"@"+str(svn_rev
), m
])
690 print "Unrelated paths: (vs. '" + source_base
+ "')"
691 print "*", unrelated_paths
695 def pull_svn_rev(log_entry
, source_repos_url
, source_url
, target_url
, keep_author
=False):
697 Pull SVN changes from the given log entry.
698 Returns the new SVN revision.
699 If an exception occurs, it will rollback to revision 'svn_rev - 1'.
701 ## Get the relative offset of source_url based on source_repos_url, e.g. u'/branches/bug123'
702 #source_base = source_url[len(source_repos_url):]
704 svn_rev
= log_entry
['revision']
705 print "\n(Starting source rev #"+str(svn_rev
)+":)"
706 print "r"+str(log_entry
['revision']) + " | " + \
707 log_entry
['author'] + " | " + \
708 str(datetime
.fromtimestamp(int(log_entry
['date'])).isoformat(' '))
709 print log_entry
['message']
710 print "------------------------------------------------------------------------"
711 commit_paths
= process_svn_log_entry(log_entry
, source_repos_url
, source_url
, target_url
)
713 # If we had too many individual paths to commit, wipe the list and just commit at
714 # the root of the working copy.
715 if len (commit_paths
) > 99:
718 # TODO: Use SVN properties to track source URL + rev in the target repo?
719 # This would provide a more reliable resume-support
721 commit_from_svn_log_entry(log_entry
, commit_paths
, keep_author
=keep_author
)
722 except ExternalCommandFailed
:
723 # try to ignore the Properties conflicts on files and dirs
724 # use the copy from original_wc
725 # TODO: Need to re-work this?
726 #has_Conflict = False
727 #for d in log_entry['changed_paths']:
729 # p = p[len(source_base):].strip("/")
730 # if os.path.isfile(p):
731 # if os.path.isfile(p + ".prej"):
732 # has_Conflict = True
733 # shutil.copy(original_wc + os.sep + p, p)
734 # p2=os.sep + p.replace('_', '__').replace('/', '_') \
735 # + ".prej-" + str(svn_rev)
736 # shutil.move(p + ".prej", os.path.dirname(original_wc) + p2)
737 # w="\n### Properties conflicts ignored:"
738 # print "%s %s, in revision: %s\n" % (w, p, svn_rev)
739 # elif os.path.isdir(p):
740 # if os.path.isfile(p + os.sep + "dir_conflicts.prej"):
741 # has_Conflict = True
742 # p2=os.sep + p.replace('_', '__').replace('/', '_') \
743 # + "_dir__conflicts.prej-" + str(svn_rev)
744 # shutil.move(p + os.sep + "dir_conflicts.prej",
745 # os.path.dirname(original_wc) + p2)
746 # w="\n### Properties conflicts ignored:"
747 # print "%s %s, in revision: %s\n" % (w, p, svn_rev)
748 # out = run_svn(["propget", "svn:ignore",
749 # original_wc + os.sep + p])
751 # run_svn(["propset", "svn:ignore", out.strip(), p])
752 # out = run_svn(["propget", "svn:externel",
753 # original_wc + os.sep + p])
755 # run_svn(["propset", "svn:external", out.strip(), p])
758 # commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author)
760 raise ExternalCommandFailed
761 print "(Finished source rev #"+str(svn_rev
)+")"
765 usage
= "Usage: %prog [-a] [-c] [-r SVN rev] <Source SVN URL> <Target SVN URL>"
766 parser
= OptionParser(usage
)
767 parser
.add_option("-a", "--keep-author", action
="store_true",
768 dest
="keep_author", help="Keep revision Author or not")
769 parser
.add_option("-c", "--continue-from-break", action
="store_true",
770 dest
="cont_from_break",
771 help="Continue from previous break")
772 parser
.add_option("-r", "--svn-rev", type="int", dest
="svn_rev",
773 help="SVN revision to checkout from")
774 (options
, args
) = parser
.parse_args()
776 display_error("incorrect number of arguments\n\nTry: svn2svn.py --help",
779 source_url
= args
.pop(0).rstrip("/")
780 target_url
= args
.pop(0).rstrip("/")
781 if options
.keep_author
:
786 # Find the greatest_rev in the source repo
787 svn_info
= get_svn_info(source_url
)
788 greatest_rev
= svn_info
['revision']
792 # if old working copy does not exist, disable continue mode
793 # TODO: Better continue support. Maybe include source repo's rev # in target commit info?
794 if not os
.path
.exists(dup_wc
):
795 options
.cont_from_break
= False
797 if not options
.cont_from_break
:
798 # Warn if Target SVN URL existed
799 cmd
= find_program("svn")
800 pipe
= Popen([cmd
] + ["list"] + [target_url
], executable
=cmd
,
801 stdout
=PIPE
, stderr
=PIPE
)
802 out
, err
= pipe
.communicate()
803 if pipe
.returncode
== 0:
804 print "Target SVN URL: %s existed!" % target_url
807 print "Press 'Enter' to Continue, 'Ctrl + C' to Cancel..."
808 print "(Timeout in 5 seconds)"
809 rfds
, wfds
, efds
= select
.select([sys
.stdin
], [], [], 5)
811 # Get log entry for the SVN revision we will check out
813 # If specify a rev, get log entry just before or at rev
814 svn_start_log
= get_last_svn_log_entry(source_url
, 1, options
.svn_rev
, False)
816 # Otherwise, get log entry of branch creation
817 # TODO: This call is *very* expensive on a repo with lots of revisions.
818 # Even though the call is passing --limit 1, it seems like that limit-filter
819 # is happening after SVN has fetched the full log history.
820 svn_start_log
= get_first_svn_log_entry(source_url
, 1, greatest_rev
, False)
822 # This is the revision we will start from for source_url
823 svn_rev
= svn_start_log
['revision']
825 # Check out a working copy of target_url
826 dup_wc
= os
.path
.abspath(dup_wc
)
827 if os
.path
.exists(dup_wc
):
828 shutil
.rmtree(dup_wc
)
829 svn_checkout(target_url
, dup_wc
)
832 # For the initial commit to the target URL, export all the contents from
833 # the source URL at the start-revision.
834 paths
= run_svn(["list", "-r", str(svn_rev
), source_url
+"@"+str(svn_rev
)])
835 paths
= paths
.strip("\n").split("\n")
840 # Directories have a trailing slash in the "svn list" output
842 path
=path
.rstrip('/')
843 if not os
.path
.exists(path
):
845 run_svn(["export", "--force", "-r" , str(svn_rev
), source_url
+"/"+path
+"@"+str(svn_rev
), path
])
846 run_svn(["add", path
])
847 commit_from_svn_log_entry(svn_start_log
, [], keep_author
)
849 dup_wc
= os
.path
.abspath(dup_wc
)
853 svn_info
= get_svn_info(source_url
)
854 # Get the base URL for the source repos, e.g. u'svn://svn.example.com/svn/repo'
855 source_repos_url
= svn_info
['repos_url']
857 if options
.cont_from_break
:
858 svn_rev
= svn_info
['revision'] - 1
862 # Load SVN log starting from svn_rev + 1
863 it_log_entries
= iter_svn_log_entries(source_url
, svn_rev
+ 1, greatest_rev
)
866 for log_entry
in it_log_entries
:
867 # Replay this revision from source_url into target_url
868 pull_svn_rev(log_entry
, source_repos_url
, source_url
, target_url
, keep_author
)
869 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
870 run_svn(["up", dup_wc
])
872 except KeyboardInterrupt:
873 print "\nStopped by user."
875 run_svn(["revert", "--recursive", "."])
877 print "\nCommand failed with following error:\n"
878 traceback
.print_exc()
880 run_svn(["revert", "--recursive", "."])
886 if __name__
== "__main__":