5 Replicate (replay) changesets from one SVN repository to another:
6 * Maintains full logical history (e.g. uses "svn copy" for renames).
7 * Maintains original commit messages.
8 * Optionally maintain source author info. (Only supported if accessing
9 target SVN repo via file://)
10 * Cannot maintain original commit date, but appends original commit date
11 for each commit message: "Date: %d".
12 * Optionally run an external shell script before each replayed commit
13 to give the ability to dynamically exclude or modify files as part
16 License: GPLv2, the same as hgsvn.
17 Author: Tony Duckles (https://github.com/tonyduckles/svn2svn)
18 (This is a forked and heavily modified verison of http://code.google.com/p/svn2svn/)
30 from optparse
import OptionParser
,OptionGroup
31 from subprocess
import Popen
, PIPE
32 from datetime
import datetime
33 from operator
import itemgetter
36 from xml
.etree
import cElementTree
as ET
39 from xml
.etree
import ElementTree
as ET
42 import cElementTree
as ET
44 from elementtree
import ElementTree
as ET
46 svn_log_args
= ['log', '--xml']
47 svn_info_args
= ['info', '--xml']
48 svn_checkout_args
= ['checkout', '-q']
49 svn_status_args
= ['status', '--xml', '-v', '--ignore-externals']
53 runsvn_timing
= False # Display how long each "svn" OS command took to run?
54 # Setup verbosity options
55 runsvn_showcmd
= False # Display every "svn" OS command we run?
56 runsvn_showout
= False # Display the stdout results from every "svn" OS command we run?
57 svnlog_verbose
= False # Display each action + changed-path as we walk the history?
59 # define exception class
60 class ExternalCommandFailed(RuntimeError):
62 An external command failed.
65 def display_error(message
, raise_exception
= True):
67 Display error message, then terminate.
69 print "Error:", message
72 raise ExternalCommandFailed
76 # Windows compatibility code by Bill Baxter
78 def find_program(name
):
80 Find the name of the program for Popen.
81 Windows is finnicky about having the complete file name. Popen
82 won't search the %PATH% for you automatically.
83 (Adapted from ctypes.find_library)
85 # See MSDN for the REAL search order.
86 base
, ext
= os
.path
.splitext(name
)
90 exts
= ['.bat', '.exe']
91 for directory
in os
.environ
['PATH'].split(os
.pathsep
):
93 fname
= os
.path
.join(directory
, base
+ e
)
94 if os
.path
.exists(fname
):
98 def find_program(name
):
100 Find the name of the program for Popen.
101 On Unix, popen isn't picky about having absolute paths.
108 p
= re
.compile('^[A-Za-z0-9=-]+$')
115 return q
+ s
.replace('\\', '\\\\').replace("'", "'\"'\"'") + q
117 locale_encoding
= locale
.getpreferredencoding()
119 def run_svn(args
, fail_if_stderr
=False, ignore_retcode_err
=False, encoding
="utf-8"):
122 exit if svn cmd failed
124 def _transform_arg(a
):
125 if isinstance(a
, unicode):
126 a
= a
.encode(encoding
or locale_encoding
)
127 elif not isinstance(a
, str):
130 t_args
= map(_transform_arg
, args
)
132 cmd
= find_program("svn")
133 cmd_string
= str(" ".join(map(shell_quote
, [cmd
] + t_args
)))
135 # Default to bright-blue for svn commands that will take action on the working-copy.
137 # For status-only commands (or commands that aren't important to highlight), show in dim-blue.
138 status_cmds
= ['status', 'st', 'log', 'info', 'list', 'propset', 'update', 'up', 'cleanup', 'revert']
139 if args
[0] in status_cmds
:
141 print "\x1b[34m"+"$"+"\x1b["+color
+"m", cmd_string
+ "\x1b[0m"
144 pipe
= Popen([cmd
] + t_args
, executable
=cmd
, stdout
=PIPE
, stderr
=PIPE
)
145 out
, err
= pipe
.communicate()
148 print "(" + str(round(time2
-time1
,4)) + " elapsed)"
149 if out
and runsvn_showout
:
151 if (pipe
.returncode
!= 0 and not ignore_retcode_err
) or (fail_if_stderr
and err
.strip()):
152 display_error("External program failed (return code %d): %s\n%s"
153 % (pipe
.returncode
, cmd_string
, err
))
156 def svn_date_to_timestamp(svn_date
):
158 Parse an SVN date as read from the XML output and
159 return the corresponding timestamp.
161 # Strip microseconds and timezone (always UTC, hopefully)
162 # XXX there are various ISO datetime parsing routines out there,
163 # cf. http://seehuhn.de/comp/pdate
164 date
= svn_date
.split('.', 2)[0]
165 time_tuple
= time
.strptime(date
, "%Y-%m-%dT%H:%M:%S")
166 return calendar
.timegm(time_tuple
)
168 def parse_svn_info_xml(xml_string
):
170 Parse the XML output from an "svn info" command and extract
171 useful information as a dict.
174 tree
= ET
.fromstring(xml_string
)
175 entry
= tree
.find('.//entry')
177 d
['url'] = entry
.find('url').text
178 d
['revision'] = int(entry
.get('revision'))
179 d
['repos_url'] = tree
.find('.//repository/root').text
180 d
['repos_uuid'] = tree
.find('.//repository/uuid').text
181 d
['last_changed_rev'] = int(tree
.find('.//commit').get('revision'))
182 d
['kind'] = entry
.get('kind')
185 def parse_svn_log_xml(xml_string
):
187 Parse the XML output from an "svn log" command and extract
188 useful information as a list of dicts (one per log changeset).
191 tree
= ET
.fromstring(xml_string
)
192 for entry
in tree
.findall('logentry'):
194 d
['revision'] = int(entry
.get('revision'))
195 # Some revisions don't have authors, most notably
196 # the first revision in a repository.
197 author
= entry
.find('author')
198 d
['author'] = author
is not None and author
.text
or None
199 d
['date'] = svn_date_to_timestamp(entry
.find('date').text
)
200 # Some revisions may have empty commit message
201 message
= entry
.find('msg')
202 message
= message
is not None and message
.text
is not None \
203 and message
.text
.strip() or ""
204 # Replace DOS return '\r\n' and MacOS return '\r' with unix return '\n'
205 d
['message'] = message
.replace('\r\n', '\n').replace('\n\r', '\n'). \
208 for prop
in entry
.findall('.//revprops/property'):
209 revprops
.append({ 'name': prop.get('name'), 'value': prop.text }
)
210 d
['revprops'] = revprops
212 for path
in entry
.findall('.//paths/path'):
213 copyfrom_rev
= path
.get('copyfrom-rev')
215 copyfrom_rev
= int(copyfrom_rev
)
218 'kind': path
.get('kind'),
219 'action': path
.get('action'),
220 'copyfrom_path': path
.get('copyfrom-path'),
221 'copyfrom_revision': copyfrom_rev
,
223 # Need to sort paths (i.e. into hierarchical order), so that process_svn_log_entry()
224 # can process actions in depth-first order.
225 d
['changed_paths'] = sorted(paths
, key
=itemgetter('path'))
229 def parse_svn_status_xml(xml_string
, base_dir
=None):
231 Parse the XML output from an "svn status" command and extract
232 useful info as a list of dicts (one per status entry).
235 tree
= ET
.fromstring(xml_string
)
236 for entry
in tree
.findall('.//entry'):
238 path
= entry
.get('path')
239 if base_dir
is not None:
240 assert path
.startswith(base_dir
)
241 path
= path
[len(base_dir
):].lstrip('/\\')
243 wc_status
= entry
.find('wc-status')
245 'props': wc_status
.get('props'),
246 'item': wc_status
.get('item'),
247 'copied': wc_status
.get('copied'),
248 'revision': wc_status
.get('revision'),
250 if d
['wc_status']['item'] == 'external':
251 d
['type'] = 'external'
252 elif d
['wc_status']['item'] == 'deleted':
253 d
['type'] = 'deleted'
254 elif d
['wc_status']['item'] == 'added':
256 elif (wc_status
.get('revision') is not None) or (d
['wc_status']['item'] == 'normal'):
259 d
['type'] = 'unversioned'
263 def get_svn_info(svn_url_or_wc
, rev_number
=None):
265 Get SVN information for the given URL or working copy,
266 with an optionally specified revision number.
267 Returns a dict as created by parse_svn_info_xml().
269 if rev_number
is not None:
270 args
= [svn_url_or_wc
+ "@" + str(rev_number
)]
272 args
= [svn_url_or_wc
]
273 xml_string
= run_svn(svn_info_args
+ args
, fail_if_stderr
=True)
274 return parse_svn_info_xml(xml_string
)
276 def svn_checkout(svn_url
, checkout_dir
, rev_number
=None):
278 Checkout the given URL at an optional revision number.
281 if rev_number
is not None:
282 args
+= ['-r', rev_number
]
283 args
+= [svn_url
, checkout_dir
]
284 return run_svn(svn_checkout_args
+ args
)
286 def run_svn_log(svn_url_or_wc
, rev_start
, rev_end
, limit
, stop_on_copy
=False, get_changed_paths
=True, get_revprops
=False):
288 Fetch up to 'limit' SVN log entries between the given revisions.
292 args
+= ['--stop-on-copy']
293 if get_changed_paths
:
296 args
+= ['--with-all-revprops']
297 url
= str(svn_url_or_wc
)
298 if rev_start
!= 'HEAD' and rev_end
!= 'HEAD':
299 args
+= ['-r', '%s:%s' % (rev_start
, rev_end
)]
300 if not "@" in svn_url_or_wc
:
301 url
+= "@" + str(max(rev_start
, rev_end
))
302 args
+= ['--limit', str(limit
), url
]
303 xml_string
= run_svn(svn_log_args
+ args
)
304 return parse_svn_log_xml(xml_string
)
306 def get_svn_status(svn_wc
, flags
=None):
308 Get SVN status information about the given working copy.
310 # Ensure proper stripping by canonicalizing the path
311 svn_wc
= os
.path
.abspath(svn_wc
)
316 xml_string
= run_svn(svn_status_args
+ args
)
317 return parse_svn_status_xml(xml_string
, svn_wc
)
319 def get_one_svn_log_entry(svn_url
, rev_start
, rev_end
, stop_on_copy
=False, get_changed_paths
=True, get_revprops
=False):
321 Get the first SVN log entry in the requested revision range.
323 entries
= run_svn_log(svn_url
, rev_start
, rev_end
, 1, stop_on_copy
, get_changed_paths
, get_revprops
)
325 display_error("No SVN log for %s between revisions %s and %s" %
326 (svn_url
, rev_start
, rev_end
))
330 def get_first_svn_log_entry(svn_url
, rev_start
, rev_end
, get_changed_paths
=True):
332 Get the first log entry after/at the given revision number in an SVN branch.
333 By default the revision number is set to 0, which will give you the log
334 entry corresponding to the branch creaction.
336 NOTE: to know whether the branch creation corresponds to an SVN import or
337 a copy from another branch, inspect elements of the 'changed_paths' entry
338 in the returned dictionary.
340 return get_one_svn_log_entry(svn_url
, rev_start
, rev_end
, stop_on_copy
=True, get_changed_paths
=True)
342 def get_last_svn_log_entry(svn_url
, rev_start
, rev_end
, get_changed_paths
=True):
344 Get the last log entry before/at the given revision number in an SVN branch.
345 By default the revision number is set to HEAD, which will give you the log
346 entry corresponding to the latest commit in branch.
348 return get_one_svn_log_entry(svn_url
, rev_end
, rev_start
, stop_on_copy
=True, get_changed_paths
=True)
351 log_duration_threshold
= 10.0
352 log_min_chunk_length
= 10
354 def iter_svn_log_entries(svn_url
, first_rev
, last_rev
, stop_on_copy
=False, get_changed_paths
=True, get_revprops
=False):
356 Iterate over SVN log entries between first_rev and last_rev.
358 This function features chunked log fetching so that it isn't too nasty
359 to the SVN server if many entries are requested.
362 chunk_length
= log_min_chunk_length
363 chunk_interval_factor
= 1.0
364 while last_rev
== "HEAD" or cur_rev
<= last_rev
:
365 start_t
= time
.time()
366 stop_rev
= min(last_rev
, cur_rev
+ int(chunk_length
* chunk_interval_factor
))
367 entries
= run_svn_log(svn_url
, cur_rev
, stop_rev
, chunk_length
, stop_on_copy
, get_changed_paths
, get_revprops
)
368 duration
= time
.time() - start_t
370 if stop_rev
== last_rev
:
372 cur_rev
= stop_rev
+ 1
373 chunk_interval_factor
*= 2.0
377 cur_rev
= e
['revision'] + 1
378 # Adapt chunk length based on measured request duration
379 if duration
< log_duration_threshold
:
380 chunk_length
= int(chunk_length
* 2.0)
381 elif duration
> log_duration_threshold
* 2:
382 chunk_length
= max(log_min_chunk_length
, int(chunk_length
/ 2.0))
384 def commit_from_svn_log_entry(entry
, files
=None, keep_author
=False, revprops
=[]):
386 Given an SVN log entry and an optional sequence of files, do an svn commit.
388 # TODO: Run optional external shell hook here, for doing pre-commit filtering
389 # This will use the local timezone for displaying commit times
390 timestamp
= int(entry
['date'])
391 svn_date
= str(datetime
.fromtimestamp(timestamp
))
392 # Uncomment this one one if you prefer UTC commit times
393 #svn_date = "%d 0" % timestamp
395 options
= ["ci", "--force-log", "-m", entry
['message'] + "\nDate: " + svn_date
, "--username", entry
['author']]
397 options
= ["ci", "--force-log", "-m", entry
['message'] + "\nDate: " + svn_date
+ "\nAuthor: " + entry
['author']]
400 options
+= ["--with-revprop", r
['name']+"="+str(r
['value'])]
402 options
+= list(files
)
403 print "(Committing source rev #"+str(entry
['revision'])+"...)"
406 def in_svn(p
, in_repo
=False):
408 Check if a given file/folder is being tracked by Subversion.
409 Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
410 With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
411 Use "svn status" to check the status of the file/folder.
413 entries
= get_svn_status(p
)
417 # If caller requires this path to be in the SVN repo, prevent returning True for locally-added paths.
418 if in_repo
and (d
['type'] == 'added' or d
['wc_status']['revision'] is None):
420 return True if (d
['type'] == 'normal' or d
['type'] == 'added') else False
422 def find_svn_ancestors(svn_repos_url
, base_path
, source_path
, source_rev
, prefix
= ""):
424 Given a source path, walk the SVN history backwards to inspect the ancestory of
425 that path, seeing if it traces back to base_path. Build an array of copyfrom_path
426 and copyfrom_revision pairs for each of the "svn copies". If we find a copyfrom_path
427 which base_path is a substring match of (e.g. we crawled back to the initial branch-
428 copy from trunk), then return the collection of ancestor paths. Otherwise,
429 copyfrom_path has no ancestory compared to base_path.
431 This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
432 file/folder was renamed in a branch and then that branch was merged back to trunk.
434 'svn_repos_url' is the full URL to the root of the SVN repository,
435 e.g. 'file:///path/to/repo'
436 'base_path' is the path in the SVN repo to the target path we're trying to
437 trace ancestry back to, e.g. 'trunk'.
438 'source_path' is the path in the SVN repo to the source path to start checking
439 ancestry at, e.g. 'branches/fix1/projectA/file1.txt'.
440 (full_path = svn_repos_url+base_path+"/"+path_offset)
441 'source_rev' is the revision to start walking the history of source_path backwards from.
444 print prefix
+"\x1b[33m" + ">> find_svn_ancestors: Start: ("+svn_repos_url
+") source_path: "+source_path
+"@"+str(source_rev
)+" base_path: "+base_path
+ "\x1b[0m"
446 working_path
= base_path
+"/"+source_path
447 working_rev
= source_rev
448 first_iter_done
= False
451 # Get the first "svn log" entry for this path (relative to @rev)
453 print prefix
+"\x1b[33m" + ">> find_svn_ancestors: " + svn_repos_url
+ working_path
+"@"+str(working_rev
) + "\x1b[0m"
454 log_entry
= get_first_svn_log_entry(svn_repos_url
+ working_path
+"@"+str(working_rev
), 1, str(working_rev
), True)
457 print prefix
+"\x1b[33m" + ">> find_svn_ancestors: Done: no log_entry " + "\x1b[0m"
460 # If we found a copy-from case which matches our base_path, we're done.
461 # ...but only if we've at least tried to search for the first copy-from path.
462 if first_iter_done
and working_path
.startswith(base_path
):
464 print prefix
+"\x1b[33m" + ">> find_svn_ancestors: Done: Found working_path.startswith(base_path) and first_iter_done=True" + "\x1b[0m"
467 first_iter_done
= True
468 # Search for any actions on our target path (or parent paths).
469 changed_paths_temp
= []
470 for d
in log_entry
['changed_paths']:
472 if path
in working_path
:
473 changed_paths_temp
.append({'path': path, 'data': d}
)
474 if not changed_paths_temp
:
475 # If no matches, then we've hit the end of the chain and this path has no ancestry back to base_path.
477 print prefix
+"\x1b[33m" + ">> find_svn_ancestors: Done: No matching changed_paths" + "\x1b[0m"
480 # Reverse-sort any matches, so that we start with the most-granular (deepest in the tree) path.
481 changed_paths
= sorted(changed_paths_temp
, key
=itemgetter('path'), reverse
=True)
482 # Find the action for our working_path in this revision
486 # Check action-type for this file
488 if action
not in 'MARD':
489 display_error("In SVN rev. %d: action '%s' not supported. \
490 Please report a bug!" % (log_entry
['revision'], action
))
492 debug_desc
= "> " + action
+ " " + path
493 if d
['copyfrom_path']:
494 debug_desc
+= " (from " + d
['copyfrom_path']+"@"+str(d
['copyfrom_revision']) + ")"
495 print prefix
+"\x1b[33m" + debug_desc
+ "\x1b[0m"
498 # If file/folder was replaced, it has no ancestor
501 print prefix
+"\x1b[33m" + ">> find_svn_ancestors: Done: replaced" + "\x1b[0m"
505 # If file/folder was deleted, it has no ancestor
508 print prefix
+"\x1b[33m" + ">> find_svn_ancestors: Done: deleted" + "\x1b[0m"
512 # If file/folder was added but not a copy, it has no ancestor
513 if not d
['copyfrom_path']:
516 print prefix
+"\x1b[33m" + ">> find_svn_ancestors: Done: no copyfrom_path" + "\x1b[0m"
519 # Else, file/folder was added and is a copy, so add an entry to our ancestors list
520 # and keep checking for ancestors
522 print prefix
+"\x1b[33m" + ">> find_svn_ancestors: Found copy-from: " + \
523 path
+ " --> " + d
['copyfrom_path']+"@"+str(d
['copyfrom_revision']) + "\x1b[0m"
524 ancestors_temp
.append({'path': path
, 'revision': log_entry
['revision'],
525 'copyfrom_path': d
['copyfrom_path'], 'copyfrom_rev': d
['copyfrom_revision']})
526 working_path
= working_path
.replace(d
['path'], d
['copyfrom_path'])
527 working_rev
= d
['copyfrom_revision']
528 # Else, follow the copy and keep on searching
531 ancestors
.append({'path': base_path+"/"+source_path, 'revision': source_rev}
)
532 working_path
= base_path
+"/"+source_path
533 for idx
in range(len(ancestors_temp
)):
534 d
= ancestors_temp
[idx
]
535 working_path
= working_path
.replace(d
['path'], d
['copyfrom_path'])
536 working_rev
= d
['copyfrom_rev']
537 ancestors
.append({'path': working_path, 'revision': working_rev}
)
540 for idx
in range(len(ancestors
)):
542 max_len
= max(max_len
, len(d
['path']+"@"+str(d
['revision'])))
543 print prefix
+"\x1b[93m" + ">> find_svn_ancestors: Found parent ancestors: " + "\x1b[0m"
544 for idx
in range(len(ancestors
)-1):
546 d_next
= ancestors
[idx
+1]
547 print prefix
+"\x1b[33m" + " ["+str(idx
)+"] " + str(d
['path']+"@"+str(d
['revision'])).ljust(max_len
) + \
548 " <-- " + str(d_next
['path']+"@"+str(d_next
['revision'])).ljust(max_len
) + "\x1b[0m"
551 print prefix
+"\x1b[33m" + ">> find_svn_ancestors: No ancestor-chain found: " + svn_repos_url
+base_path
+"/"+source_path
+"@"+(str(source_rev
)) + "\x1b[0m"
554 def get_rev_map(rev_map
, src_rev
, prefix
):
556 Find the equivalent rev # in the target repo for the given rev # from the source repo.
559 print prefix
+ "\x1b[32m" + ">> get_rev_map("+str(src_rev
)+")" + "\x1b[0m"
560 # Find the highest entry less-than-or-equal-to src_rev
561 for rev
in range(src_rev
, 0, -1):
563 print prefix
+ "\x1b[32m" + ">> get_rev_map: rev="+str(rev
)+" in_rev_map="+str(rev
in rev_map
) + "\x1b[0m"
566 # Else, we fell off the bottom of the rev_map. Ruh-roh...
569 def get_svn_dirlist(svn_path
, svn_rev
= ""):
571 Get a list of all the child contents (recusive) of the given folder path.
576 args
+= ["-r", str(svn_rev
)]
577 path
+= "@"+str(svn_rev
)
579 paths
= run_svn(args
, False, True)
580 paths
= paths
.strip("\n").split("\n") if len(paths
)>1 else []
583 def _add_export_todo(export_todo
, path_offset
):
585 for p
in export_todo
:
586 if path_offset
.startswith(p
):
590 export_todo
.append(path_offset
)
593 def do_svn_add(source_repos_url
, source_url
, path_offset
, target_url
, source_rev
, \
594 parent_copyfrom_path
="", parent_copyfrom_rev
="", export_todo
={}, \
595 rev_map
={}, is_dir
= False, prefix
= ""):
597 Given the add'd source path, replay the "svn add/copy" commands to correctly
598 track renames across copy-from's.
600 For example, consider a sequence of events like this:
601 1. svn copy /trunk /branches/fix1
602 2. (Make some changes on /branches/fix1)
603 3. svn mv /branches/fix1/Proj1 /branches/fix1/Proj2 " Rename folder
604 4. svn mv /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt " Rename file inside renamed folder
605 5. svn co /trunk && svn merge /branches/fix1
606 After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
607 and and add of /trunk/Proj2 copy-from /branches/fix1/Proj2. If we were just
608 to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
609 we'd lose the logical history that Proj2/file2.txt is really a descendant
612 'source_repos_url' is the full URL to the root of the source repository.
613 'source_url' is the full URL to the source path in the source repository.
614 'path_offset' is the offset from source_base to the file to check ancestry for,
615 e.g. 'projectA/file1.txt'. path = source_repos_url + source_base + path_offset.
616 'target_url' is the full URL to the target path in the target repository.
617 'source_rev' is the revision ("svn log") that we're processing from the source repo.
618 'rev_map' is the running mapping-table dictionary for source-repo rev #'s
619 to the equivalent target-repo rev #'s.
620 'is_dir' is whether path_offset is a directory (rather than a file).
622 source_base
= source_url
[len(source_repos_url
):]
624 print prefix
+ "\x1b[32m" + ">> do_svn_add: " + source_base
+"/"+path_offset
+"@"+str(source_rev
) + \
625 (" (parent-copyfrom: "+parent_copyfrom_path
+"@"+str(parent_copyfrom_rev
)+")" if parent_copyfrom_path
else "") + "\x1b[0m"
626 # Check if the given path has ancestors which chain back to the current source_base
627 found_ancestor
= False
628 ancestors
= find_svn_ancestors(source_repos_url
, source_base
, path_offset
, source_rev
, prefix
+" ")
629 # TODO: Need to handle replace cases. find_svn_ancestors() should have an output param.
630 # ancestors[n] is the original (pre-branch-copy) trunk path.
631 # ancestors[n-1] is the first commit on the new branch.
632 copyfrom_path
= ancestors
[len(ancestors
)-1]['path'] if ancestors
else ""
633 copyfrom_rev
= ancestors
[len(ancestors
)-1]['revision'] if ancestors
else ""
635 # The copy-from path has ancestory back to source_url.
637 print prefix
+ "\x1b[32;1m" + ">> do_svn_add: Check copy-from: Found parent: " + copyfrom_path
+"@"+str(copyfrom_rev
) + "\x1b[0m"
638 found_ancestor
= True
639 # Map the copyfrom_rev (source repo) to the equivalent target repo rev #. This can
640 # return None in the case where copyfrom_rev is *before* our source_start_rev.
641 tgt_rev
= get_rev_map(rev_map
, copyfrom_rev
, prefix
+" ")
643 print prefix
+ "\x1b[32m" + ">> do_svn_add: get_rev_map: " + str(copyfrom_rev
) + " (source) -> " + str(tgt_rev
) + " (target)" + "\x1b[0m"
646 print prefix
+ "\x1b[32;1m" + ">> do_svn_add: Check copy-from: No ancestor chain found." + "\x1b[0m"
647 found_ancestor
= False
648 if found_ancestor
and tgt_rev
:
649 # Check if this path_offset in the target WC already has this ancestry, in which
650 # case there's no need to run the "svn copy" (again).
651 path_in_svn
= in_svn(path_offset
)
652 log_entry
= get_last_svn_log_entry(path_offset
, 1, 'HEAD', get_changed_paths
=False) if in_svn(path_offset
, True) else []
653 if (not log_entry
or (log_entry
['revision'] != tgt_rev
)):
654 copyfrom_offset
= copyfrom_path
[len(source_base
):].strip('/')
656 print prefix
+ "\x1b[32m" + ">> do_svn_add: svn_copy: Copy-from: " + copyfrom_path
+"@"+str(copyfrom_rev
) + "\x1b[0m"
657 print prefix
+ "in_svn("+path_offset
+") = " + str(path_in_svn
)
658 print prefix
+ "copyfrom_path: "+copyfrom_path
+" parent_copyfrom_path: "+parent_copyfrom_path
659 print prefix
+ "copyfrom_rev: "+str(copyfrom_rev
)+" parent_copyfrom_rev: "+str(parent_copyfrom_rev
)
661 ((parent_copyfrom_path
and copyfrom_path
.startswith(parent_copyfrom_path
)) and \
662 (parent_copyfrom_rev
and copyfrom_rev
== parent_copyfrom_rev
)):
663 # When being called recursively, if this child entry has the same ancestor as the
664 # the parent, then no need to try to run another "svn copy".
666 print prefix
+ "\x1b[32m" + ">> do_svn_add: svn_copy: Same ancestry as parent: " + parent_copyfrom_path
+"@"+str(parent_copyfrom_rev
) + "\x1b[0m"
669 # Copy this path from the equivalent path+rev in the target repo, to create the
670 # equivalent history.
672 # If local file is already under version-control, revert it before we run the "svn copy".
673 # TODO: Is this right? Do we need to do a revert and/or hard-delete?
675 print prefix
+ "\x1b[32m" + ">> do_svn_add: pre-copy: local path already exists: " + path_offset
+ "\x1b[0m"
676 run_svn(["revert", path_offset
])
677 if os
.path
.exists(path_offset
):
678 # If local file still exists after "svn revert", then hard-delete it.
679 shutil
.rmtree(path_offset
) if is_dir
else os
.remove(path_offset
)
680 run_svn(["copy", "-r", tgt_rev
, target_url
+"/"+copyfrom_offset
+"@"+str(tgt_rev
), path_offset
])
682 # For files, export the final version of this file from the source repo, to make
683 # sure we're up-to-date.
684 export_todo
= _add_export_todo(export_todo
, path_offset
)
685 #run_svn(["export", "--force", "-r", str(copyfrom_rev),
686 # source_repos_url + copyfrom_path+"@"+str(copyfrom_rev), path_offset])
688 print prefix
+ "\x1b[32m" + ">> do_svn_add: Skipped 'svn copy': " + path_offset
+ "\x1b[0m"
690 # Else, either this copy-from path has no ancestry back to source_url OR copyfrom_rev comes
691 # before our initial source_start_rev (i.e. tgt_rev == None), so can't do a "svn copy".
692 # Create (parent) directory if needed.
693 # TODO: This is (nearly) a duplicate of code in process_svn_log_entry(). Should this be
694 # split-out to a shared tag?
695 p_path
= path_offset
if is_dir
else os
.path
.dirname(path_offset
).strip() or '.'
696 if not os
.path
.exists(p_path
):
697 run_svn(["mkdir", p_path
])
698 if not in_svn(path_offset
):
700 # Export the final verison of all files in this folder.
701 export_todo
= _add_export_todo(export_todo
, path_offset
)
703 # Export the final verison of this file. We *need* to do this before running
704 # the "svn add", even if we end-up re-exporting this file again via export_todo.
705 run_svn(["export", "--force", "-r", str(source_rev
),
706 source_repos_url
+source_base
+"/"+path_offset
+"@"+str(source_rev
), path_offset
])
707 # If not already under version-control, then "svn add" this file/folder.
708 run_svn(["add", "--parents", path_offset
])
709 # TODO: Need to copy SVN properties from source repos
711 # For any folders that we process, process any child contents, so that we correctly
712 # replay copies/replaces/etc.
713 do_svn_add_dir(source_repos_url
, source_url
, path_offset
, source_rev
, target_url
,
714 copyfrom_path
, copyfrom_rev
, export_todo
, rev_map
, prefix
+" ")
716 def do_svn_add_dir(source_repos_url
, source_url
, path_offset
, source_rev
, target_url
, \
717 parent_copyfrom_path
, parent_copyfrom_rev
, export_todo
, rev_map
, prefix
=""):
718 source_base
= source_url
[len(source_repos_url
):]
719 # Get the directory contents, to compare between the local WC (target_url) vs. the remote repo (source_url)
720 # TODO: paths_local won't include add'd paths because "svn ls" lists the contents of the
721 # associated remote repo folder. (Is this a problem?)
722 paths_local
= get_svn_dirlist(path_offset
)
723 paths_remote
= get_svn_dirlist(source_url
+"/"+path_offset
, source_rev
)
725 print prefix
+ "\x1b[32m" + ">> do_svn_add_dir: paths_local: " + str(paths_local
) + "\x1b[0m"
726 print prefix
+ "\x1b[32m" + ">> do_svn_add_dir: paths_remote: " + str(paths_remote
) + "\x1b[0m"
727 # TODO: Is this right?
728 # * What if one of the files was replaced?
729 # * What if do_svn_add() ends-up copy'ing a sub-folder, which ends-up
730 # introducing files which were originally missing in paths_local? Should this
731 # be breadth-first (handle immediate contents, and call recursively for folders)?
732 # * What about other cases where the child file/folder history doesn't chain back
733 # to exactly match the chain of this (parent) folder?
734 # Update files/folders which exist in remote but not local
735 for path
in paths_remote
:
736 path_is_dir
= True if path
[-1] == "/" else False
737 working_path
= path_offset
+"/"+(path
.rstrip('/') if path_is_dir
else path
)
738 do_svn_add(source_repos_url
, source_url
, working_path
, target_url
, source_rev
,
739 parent_copyfrom_path
, parent_copyfrom_rev
, export_todo
,
740 rev_map
, path_is_dir
, prefix
+" ")
741 #if not path in paths_local:
742 # path_is_dir = True if path[-1] == "/" else False
743 # # Update this file, using do_svn_add to handle any copy's
744 # do_svn_add(source_repos_url, source_url, path_offset+"/"+path,
745 # target_url, source_rev, rev_map, path_is_dir, prefix+" ")
747 # path_is_dir = True if path[-1] == "/" else False
748 # if not path_is_dir:
749 # run_svn(["export", "--force", "-r", str(source_rev),
750 # source_repos_url+source_base+"/"+path_offset+"/"+path+"@"+str(source_rev), path_offset+"/"+path])
751 # Remove files/folders which exist in local but not remote
752 for path
in paths_local
:
753 if not path
in paths_remote
:
755 print " D " + source_base
+"/"+path_offset
+"/"+path
756 run_svn(["remove", "--force", path_offset
+"/"+path
])
757 # TODO: Does this handle deleted folders too? Wouldn't want to have a case
758 # where we only delete all files from folder but leave orphaned folder around.
760 def process_svn_log_entry(log_entry
, source_repos_url
, source_url
, target_url
, \
761 rev_map
, removed_paths
= [], commit_paths
= [], prefix
= ""):
763 Process SVN changes from the given log entry.
764 Returns array of all the paths in the working-copy that were changed,
765 i.e. the paths which need to be "svn commit".
767 'log_entry' is the array structure built by parse_svn_log_xml().
768 'source_repos_url' is the full URL to the root of the source repository.
769 'source_url' is the full URL to the source path in the source repository.
770 'target_url' is the full URL to the target path in the target repository.
771 'rev_map' is the running mapping-table dictionary for source-repo rev #'s
772 to the equivalent target-repo rev #'s.
773 'removed_paths' is the working list of deferred deletions.
774 'commit_paths' is the working list of specific paths which changes to pass
775 to the final "svn commit".
778 # Get the relative offset of source_url based on source_repos_url
779 # e.g. '/branches/bug123'
780 source_base
= source_url
[len(source_repos_url
):]
781 source_rev
= log_entry
['revision']
783 print prefix
+ "\x1b[32m" + ">> process_svn_log_entry: " + source_url
+"@"+str(source_rev
) + "\x1b[0m"
784 for d
in log_entry
['changed_paths']:
785 # Get the full path for this changed_path
786 # e.g. '/branches/bug123/projectA/file1.txt'
788 if not path
.startswith(source_base
+ "/"):
789 # Ignore changed files that are not part of this subdir
790 if path
!= source_base
:
792 print prefix
+ "\x1b[90m" + ">> process_svn_log_entry: Unrelated path: " + path
+ " (" + source_base
+ ")" + "\x1b[0m"
794 # Calculate the offset (based on source_base) for this changed_path
795 # e.g. 'projectA/file1.txt'
796 # (path = source_base + "/" + path_offset)
797 path_offset
= path
[len(source_base
):].strip("/")
798 # Get the action for this path
800 if action
not in 'MARD':
801 display_error("In SVN rev. %d: action '%s' not supported. \
802 Please report a bug!" % (source_rev
, action
))
804 # Try to be efficient and keep track of an explicit list of paths in the
805 # working copy that changed. If we commit from the root of the working copy,
806 # then SVN needs to crawl the entire working copy looking for pending changes.
807 # But, if we gather too many paths to commit, then we wipe commit_paths below
808 # and end-up doing a commit at the root of the working-copy.
809 if len (commit_paths
) < 100:
810 commit_paths
.append(path_offset
)
812 # Special-handling for replace's
815 msg
= " " + action
+ " " + d
['path']
816 if d
['copyfrom_path']:
817 msg
+= " (from " + d
['copyfrom_path']+"@"+str(d
['copyfrom_revision']) + ")"
819 # If file was "replaced" (deleted then re-added, all in same revision),
820 # then we need to run the "svn rm" first, then change action='A'. This
821 # lets the normal code below handle re-"svn add"'ing the files. This
822 # should replicate the "replace".
823 run_svn(["remove", "--force", path_offset
])
826 # Handle all the various action-types
827 # (Handle "add" first, for "svn copy/move" support)
830 msg
= " " + action
+ " " + d
['path']
831 if d
['copyfrom_path']:
832 msg
+= " (from " + d
['copyfrom_path']+"@"+str(d
['copyfrom_revision']) + ")"
834 # If we have any queued deletions for this same path, remove those if we're re-adding this path.
835 if path_offset
in removed_paths
:
836 removed_paths
.remove(path_offset
)
837 # Determine where to export from.
838 #copyfrom_path = path
839 #copyfrom_rev = source_rev
841 path_is_dir
= True if d
['kind'] == 'dir' else False
842 # Handle cases where this "add" was a copy from another URL in the source repos
843 if d
['copyfrom_revision']:
844 copyfrom_path
= d
['copyfrom_path']
845 copyfrom_rev
= d
['copyfrom_revision']
846 do_svn_add(source_repos_url
, source_url
, path_offset
, target_url
, source_rev
,
847 "", "", export_todo
, rev_map
, path_is_dir
, prefix
+" ")
848 # Else just "svn export" the files from the source repo and "svn add" them.
850 # Create (parent) directory if needed
851 p_path
= path_offset
if path_is_dir
else os
.path
.dirname(path_offset
).strip() or '.'
852 if not os
.path
.exists(p_path
):
853 run_svn(["mkdir", p_path
])
855 # Export the entire added tree.
857 export_todo
= _add_export_todo(export_todo
, path_offset
)
859 # Export the final verison of this file. We *need* to do this before running
860 # the "svn add", even if we end-up re-exporting this file again via export_todo.
861 run_svn(["export", "--force", "-r", str(source_rev
),
862 source_repos_url
+source_base
+"/"+path_offset
+"@"+str(source_rev
), path_offset
])
863 # TODO: Do we need the in_svn check here?
864 #if not in_svn(path_offset):
865 run_svn(["add", "--parents", path_offset
])
866 # TODO: Need to copy SVN properties from source repos
869 # Queue "svn remove" commands, to allow the action == 'A' handling the opportunity
870 # to do smart "svn copy" handling on copy/move/renames.
871 if not path_offset
in removed_paths
:
872 removed_paths
.append(path_offset
)
876 print prefix
+ " " + action
+ " " + d
['path']
877 # TODO: Is "svn merge -c" correct here? Should this just be an "svn export" plus
879 out
= run_svn(["merge", "-c", str(source_rev
), "--non-recursive",
880 "--non-interactive", "--accept=theirs-full",
881 source_url
+"/"+path_offset
+"@"+str(source_rev
), path_offset
])
884 display_error("Internal Error: process_svn_log_entry: Unhandled 'action' value: '" + action
+ "'")
886 # Export the final version of all add'd paths from source_url
888 for path_offset
in export_todo
:
889 run_svn(["export", "--force", "-r", str(source_rev
),
890 source_repos_url
+source_base
+"/"+path_offset
+"@"+str(source_rev
), path_offset
])
894 def disp_svn_log_summary(log_entry
):
895 print "\n(Starting source rev #"+str(log_entry
['revision'])+":)"
896 print "r"+str(log_entry
['revision']) + " | " + \
897 log_entry
['author'] + " | " + \
898 str(datetime
.fromtimestamp(int(log_entry
['date'])).isoformat(' '))
899 print log_entry
['message']
900 print "------------------------------------------------------------------------"
902 def pull_svn_rev(log_entry
, source_repos_url
, source_repos_uuid
, source_url
, target_url
, rev_map
, keep_author
=False):
904 Pull SVN changes from the given log entry.
905 Returns the new SVN revision.
906 If an exception occurs, it will rollback to revision 'source_rev - 1'.
908 disp_svn_log_summary(log_entry
)
909 source_rev
= log_entry
['revision']
911 # Process all the paths in this log entry
914 process_svn_log_entry(log_entry
, source_repos_url
, source_url
, target_url
,
915 rev_map
, removed_paths
, commit_paths
)
916 # Process any deferred removed actions
917 # TODO: Move this code back to process_svn_log_entry(), now that we're not calling process_svn_log_entry() recursively
919 path_base
= source_url
[len(source_repos_url
):]
920 for path_offset
in removed_paths
:
922 print " D " + path_base
+"/"+path_offset
923 run_svn(["remove", "--force", path_offset
])
925 # If we had too many individual paths to commit, wipe the list and just commit at
926 # the root of the working copy.
927 if len (commit_paths
) > 99:
930 # Add source-tracking revprop's
931 revprops
= [{'name':'source_uuid', 'value':source_repos_uuid}
,
932 {'name':'source_url', 'value':source_url}
,
933 {'name':'source_rev', 'value':source_rev}
]
934 commit_from_svn_log_entry(log_entry
, commit_paths
, keep_author
=keep_author
, revprops
=revprops
)
935 print "(Finished source rev #"+str(source_rev
)+")"
938 usage
= "Usage: %prog [-a] [-c] [-r SVN rev] source_url target_url"
939 parser
= OptionParser(usage
)
940 parser
.add_option("-r", "--revision", type="int", dest
="svn_rev", metavar
="REV",
941 help="initial SVN revision to checkout from")
942 parser
.add_option("-a", "--keep-author", action
="store_true", dest
="keep_author",
943 help="maintain original Author info from source repo")
944 parser
.add_option("-c", "--continue", action
="store_true", dest
="cont_from_break",
945 help="continue from previous break")
946 parser
.add_option("-v", "--verbose", action
="store_true", dest
="verbose",
947 help="show 'svn status'-style messages for each action replayed [default]")
948 parser
.add_option("-q", "--quiet", action
="store_false", dest
="verbose",
949 help="show only minimal status/progress messages")
950 parser
.set_defaults(verbose
=True)
951 group
= OptionGroup(parser
, "Debug Options")
952 group
.add_option("--debug", action
="store_true", dest
="debug_all",
953 help="enable all debugging options")
954 group
.add_option("--debug-showcmds", action
="store_true", dest
="debug_showcmds",
955 help="display each SVN command being executed")
956 group
.add_option("--debug-debugmsgs", action
="store_true", dest
="debug_debugmsgs",
957 help="display debug messages")
958 parser
.add_option_group(group
)
959 (options
, args
) = parser
.parse_args()
961 display_error("incorrect number of arguments\n\nTry: svn2svn.py --help",
964 source_url
= args
.pop(0).rstrip("/")
965 target_url
= args
.pop(0).rstrip("/")
966 if options
.keep_author
:
971 # Find the greatest_rev in the source repo
972 svn_info
= get_svn_info(source_url
)
973 greatest_rev
= svn_info
['revision']
974 # Get the base URL for the source repos, e.g. 'svn://svn.example.com/svn/repo'
975 source_repos_url
= svn_info
['repos_url']
976 # Get the UUID for the source repos
977 source_repos_uuid
= svn_info
['repos_uuid']
979 wc_target
= "_wc_target"
981 global debug
, runsvn_showcmd
, svnlog_verbose
983 if options
.debug_debugmsgs
:
985 if options
.debug_showcmds
:
986 runsvn_showcmd
= True
987 if options
.debug_all
:
989 runsvn_showcmd
= True
991 svnlog_verbose
= True
993 # if old working copy does not exist, disable continue mode
994 # TODO: Better continue support. Maybe include source repo's rev # in target commit info?
995 if not os
.path
.exists(wc_target
):
996 options
.cont_from_break
= False
998 if not options
.cont_from_break
:
999 # Warn if Target SVN URL existed
1000 cmd
= find_program("svn")
1001 pipe
= Popen([cmd
] + ["list"] + [target_url
], executable
=cmd
,
1002 stdout
=PIPE
, stderr
=PIPE
)
1003 out
, err
= pipe
.communicate()
1004 if pipe
.returncode
== 0:
1005 print "Target SVN URL: %s existed!" % target_url
1008 print "Press 'Enter' to Continue, 'Ctrl + C' to Cancel..."
1009 print "(Timeout in 5 seconds)"
1010 rfds
, wfds
, efds
= select
.select([sys
.stdin
], [], [], 5)
1012 # Get log entry for the SVN revision we will check out
1014 # If specify a rev, get log entry just before or at rev
1015 svn_start_log
= get_last_svn_log_entry(source_url
, 1, options
.svn_rev
, False)
1017 # Otherwise, get log entry of branch creation
1018 # TODO: This call is *very* expensive on a repo with lots of revisions.
1019 # Even though the call is passing --limit 1, it seems like that limit-filter
1020 # is happening after SVN has fetched the full log history.
1021 svn_start_log
= get_first_svn_log_entry(source_url
, 1, greatest_rev
, False)
1023 # This is the revision we will start from for source_url
1024 source_start_rev
= svn_start_log
['revision']
1026 # Check out a working copy of target_url
1027 wc_target
= os
.path
.abspath(wc_target
)
1028 if os
.path
.exists(wc_target
):
1029 shutil
.rmtree(wc_target
)
1030 svn_checkout(target_url
, wc_target
)
1033 # For the initial commit to the target URL, export all the contents from
1034 # the source URL at the start-revision.
1035 paths
= run_svn(["list", "-r", str(source_start_rev
), source_url
+"@"+str(source_start_rev
)])
1037 disp_svn_log_summary(get_one_svn_log_entry(source_url
, source_start_rev
, source_start_rev
))
1038 print "(Initial import)"
1039 paths
= paths
.strip("\n").split("\n")
1041 # For each top-level file/folder...
1045 # Directories have a trailing slash in the "svn list" output
1046 path_is_dir
= True if path
[-1] == "/" else False
1048 path
=path
.rstrip('/')
1049 if not os
.path
.exists(path
):
1051 run_svn(["export", "--force", "-r" , str(source_start_rev
), source_url
+"/"+path
+"@"+str(source_start_rev
), path
])
1052 run_svn(["add", path
])
1053 revprops
= [{'name':'source_uuid', 'value':source_repos_uuid}
,
1054 {'name':'source_url', 'value':source_url}
,
1055 {'name':'source_rev', 'value':source_start_rev}
]
1056 commit_from_svn_log_entry(svn_start_log
, [], keep_author
=keep_author
, revprops
=revprops
)
1057 print "(Finished source rev #"+str(source_start_rev
)+")"
1059 wc_target
= os
.path
.abspath(wc_target
)
1061 # TODO: Need better resume support. For the time being, expect caller explictly passes in resume revision.
1062 source_start_rev
= options
.svn_rev
1063 if source_start_rev
< 1:
1064 display_error("Invalid arguments\n\nNeed to pass result rev # (-r) when using continue-mode (-c)", False)
1066 # Load SVN log starting from source_start_rev + 1
1067 it_log_entries
= iter_svn_log_entries(source_url
, source_start_rev
+ 1, greatest_rev
)
1070 for log_entry
in it_log_entries
:
1071 # Replay this revision from source_url into target_url
1072 pull_svn_rev(log_entry
, source_repos_url
, source_repos_uuid
, source_url
,
1073 target_url
, rev_map
, keep_author
)
1074 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
1076 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
1077 dup_info
= get_svn_info(target_url
)
1078 dup_rev
= dup_info
['revision']
1079 source_rev
= log_entry
['revision']
1081 print "\x1b[32m" + ">> main: rev_map.add: source_rev=%s target_rev=%s" % (source_rev
, dup_rev
) + "\x1b[0m"
1082 rev_map
[source_rev
] = dup_rev
1084 except KeyboardInterrupt:
1085 print "\nStopped by user."
1086 run_svn(["cleanup"])
1087 run_svn(["revert", "--recursive", "."])
1088 # TODO: Run "svn status" and pro-actively delete any "?" orphaned entries, to clean-up the WC?
1090 print "\nCommand failed with following error:\n"
1091 traceback
.print_exc()
1092 run_svn(["cleanup"])
1093 run_svn(["revert", "--recursive", "."])
1094 # TODO: Run "svn status" and pro-actively delete any "?" orphaned entries, to clean-up the WC?
1100 if __name__
== "__main__":