5 Replicate (replay) changesets from one SVN repository to another:
6 * Maintains full logical history (e.g. uses "svn copy" for renames).
7 * Maintains original commit messages.
8 * Cannot maintain original commit date, but appends original commit date
9 for each commit message: "Date: %d".
10 * Optionally maintain source author info. (Only supported if accessing
11 target SVN repo via file://)
12 * Optionally run an external shell script before each replayed commit
13 to give the ability to dynamically exclude or modify files as part
16 License: GPLv2, the same as hgsvn.
17 Author: Tony Duckles (https://github.com/tonyduckles/svn2svn)
18 (This is a forked and modified verison of http://code.google.com/p/svn2svn/)
30 from optparse
import OptionParser
31 from subprocess
import Popen
, PIPE
32 from datetime
import datetime
33 from operator
import itemgetter
36 from xml
.etree
import cElementTree
as ET
39 from xml
.etree
import ElementTree
as ET
42 import cElementTree
as ET
44 from elementtree
import ElementTree
as ET
46 svn_log_args
= ['log', '--xml']
47 svn_info_args
= ['info', '--xml']
48 svn_checkout_args
= ['checkout', '-q']
49 svn_status_args
= ['status', '--xml', '-v', '--ignore-externals']
53 debug_runsvn_timing
= False # Display how long each "svn" OS command took to run?
54 # Setup verbosity options
55 runsvn_showcmd
= False # Display every "svn" OS command we run?
56 runsvn_showout
= False # Display the stdout results from every "svn" OS command we run?
57 svnlog_verbose
= True # Display each action + changed-path as we walk the history?
59 # define exception class
60 class ExternalCommandFailed(RuntimeError):
62 An external command failed.
65 def display_error(message
, raise_exception
= True):
67 Display error message, then terminate.
69 print "Error:", message
72 raise ExternalCommandFailed
76 # Windows compatibility code by Bill Baxter
78 def find_program(name
):
80 Find the name of the program for Popen.
81 Windows is finnicky about having the complete file name. Popen
82 won't search the %PATH% for you automatically.
83 (Adapted from ctypes.find_library)
85 # See MSDN for the REAL search order.
86 base
, ext
= os
.path
.splitext(name
)
90 exts
= ['.bat', '.exe']
91 for directory
in os
.environ
['PATH'].split(os
.pathsep
):
93 fname
= os
.path
.join(directory
, base
+ e
)
94 if os
.path
.exists(fname
):
98 def find_program(name
):
100 Find the name of the program for Popen.
101 On Unix, popen isn't picky about having absolute paths.
108 p
= re
.compile('^[A-Za-z0-9=-]+$')
115 return q
+ s
.replace('\\', '\\\\').replace("'", "'\"'\"'") + q
117 locale_encoding
= locale
.getpreferredencoding()
119 def run_svn(args
, fail_if_stderr
=False, ignore_retcode_err
=False, encoding
="utf-8"):
122 exit if svn cmd failed
124 def _transform_arg(a
):
125 if isinstance(a
, unicode):
126 a
= a
.encode(encoding
or locale_encoding
)
127 elif not isinstance(a
, str):
130 t_args
= map(_transform_arg
, args
)
132 cmd
= find_program("svn")
133 cmd_string
= str(" ".join(map(shell_quote
, [cmd
] + t_args
)))
135 # Default to bright-blue for svn commands that will take action on the working-copy.
137 # For status-only commands (or commands that aren't important to highlight), show in dim-blue.
138 status_cmds
= ['status', 'st', 'log', 'info', 'list', 'propset', 'update', 'up', 'cleanup', 'revert']
139 if args
[0] in status_cmds
:
141 print "\x1b[34m"+"$"+"\x1b["+color
+"m", cmd_string
+ "\x1b[0m"
142 if debug_runsvn_timing
:
144 pipe
= Popen([cmd
] + t_args
, executable
=cmd
, stdout
=PIPE
, stderr
=PIPE
)
145 out
, err
= pipe
.communicate()
146 if debug_runsvn_timing
:
148 print "(" + str(round(time2
-time1
,4)) + " elapsed)"
149 if out
and runsvn_showout
:
151 if (pipe
.returncode
!= 0 and not ignore_retcode_err
) or (fail_if_stderr
and err
.strip()):
152 display_error("External program failed (return code %d): %s\n%s"
153 % (pipe
.returncode
, cmd_string
, err
))
156 def svn_date_to_timestamp(svn_date
):
158 Parse an SVN date as read from the XML output and
159 return the corresponding timestamp.
161 # Strip microseconds and timezone (always UTC, hopefully)
162 # XXX there are various ISO datetime parsing routines out there,
163 # cf. http://seehuhn.de/comp/pdate
164 date
= svn_date
.split('.', 2)[0]
165 time_tuple
= time
.strptime(date
, "%Y-%m-%dT%H:%M:%S")
166 return calendar
.timegm(time_tuple
)
168 def parse_svn_info_xml(xml_string
):
170 Parse the XML output from an "svn info" command and extract
171 useful information as a dict.
174 tree
= ET
.fromstring(xml_string
)
175 entry
= tree
.find('.//entry')
177 d
['url'] = entry
.find('url').text
178 d
['revision'] = int(entry
.get('revision'))
179 d
['repos_url'] = tree
.find('.//repository/root').text
180 d
['repos_uuid'] = tree
.find('.//repository/uuid').text
181 d
['last_changed_rev'] = int(tree
.find('.//commit').get('revision'))
182 d
['kind'] = entry
.get('kind')
185 def parse_svn_log_xml(xml_string
):
187 Parse the XML output from an "svn log" command and extract
188 useful information as a list of dicts (one per log changeset).
191 tree
= ET
.fromstring(xml_string
)
192 for entry
in tree
.findall('logentry'):
194 d
['revision'] = int(entry
.get('revision'))
195 # Some revisions don't have authors, most notably
196 # the first revision in a repository.
197 author
= entry
.find('author')
198 d
['author'] = author
is not None and author
.text
or None
199 d
['date'] = svn_date_to_timestamp(entry
.find('date').text
)
200 # Some revisions may have empty commit message
201 message
= entry
.find('msg')
202 message
= message
is not None and message
.text
is not None \
203 and message
.text
.strip() or ""
204 # Replace DOS return '\r\n' and MacOS return '\r' with unix return '\n'
205 d
['message'] = message
.replace('\r\n', '\n').replace('\n\r', '\n'). \
208 for prop
in entry
.findall('.//revprops/property'):
209 revprops
.append({ 'name': prop.get('name'), 'value': prop.text }
)
210 d
['revprops'] = revprops
212 for path
in entry
.findall('.//paths/path'):
213 copyfrom_rev
= path
.get('copyfrom-rev')
215 copyfrom_rev
= int(copyfrom_rev
)
218 'kind': path
.get('kind'),
219 'action': path
.get('action'),
220 'copyfrom_path': path
.get('copyfrom-path'),
221 'copyfrom_revision': copyfrom_rev
,
223 # Need to sort paths (i.e. into hierarchical order), so that process_svn_log_entry()
224 # can process actions in depth-first order.
225 d
['changed_paths'] = sorted(paths
, key
=itemgetter('path'))
229 def parse_svn_status_xml(xml_string
, base_dir
=None):
231 Parse the XML output from an "svn status" command and extract
232 useful info as a list of dicts (one per status entry).
235 tree
= ET
.fromstring(xml_string
)
236 for entry
in tree
.findall('.//entry'):
238 path
= entry
.get('path')
239 if base_dir
is not None:
240 assert path
.startswith(base_dir
)
241 path
= path
[len(base_dir
):].lstrip('/\\')
243 wc_status
= entry
.find('wc-status')
244 if wc_status
.get('item') == 'external':
245 d
['type'] = 'external'
246 elif wc_status
.get('item') == 'deleted':
247 d
['type'] = 'deleted'
248 elif wc_status
.get('revision') is not None:
251 d
['type'] = 'unversioned'
255 def get_svn_info(svn_url_or_wc
, rev_number
=None):
257 Get SVN information for the given URL or working copy,
258 with an optionally specified revision number.
259 Returns a dict as created by parse_svn_info_xml().
261 if rev_number
is not None:
262 args
= [svn_url_or_wc
+ "@" + str(rev_number
)]
264 args
= [svn_url_or_wc
]
265 xml_string
= run_svn(svn_info_args
+ args
, fail_if_stderr
=True)
266 return parse_svn_info_xml(xml_string
)
268 def svn_checkout(svn_url
, checkout_dir
, rev_number
=None):
270 Checkout the given URL at an optional revision number.
273 if rev_number
is not None:
274 args
+= ['-r', rev_number
]
275 args
+= [svn_url
, checkout_dir
]
276 return run_svn(svn_checkout_args
+ args
)
278 def run_svn_log(svn_url_or_wc
, rev_start
, rev_end
, limit
, stop_on_copy
=False, get_changed_paths
=True, get_revprops
=False):
280 Fetch up to 'limit' SVN log entries between the given revisions.
284 args
+= ['--stop-on-copy']
285 if get_changed_paths
:
288 args
+= ['--with-all-revprops']
289 url
= str(svn_url_or_wc
)
290 if rev_start
!= 'HEAD' and rev_end
!= 'HEAD':
291 args
+= ['-r', '%s:%s' % (rev_start
, rev_end
)]
292 if not "@" in svn_url_or_wc
:
293 url
+= "@" + str(max(rev_start
, rev_end
))
294 args
+= ['--limit', str(limit
), url
]
295 xml_string
= run_svn(svn_log_args
+ args
)
296 return parse_svn_log_xml(xml_string
)
298 def get_svn_status(svn_wc
, flags
=None):
300 Get SVN status information about the given working copy.
302 # Ensure proper stripping by canonicalizing the path
303 svn_wc
= os
.path
.abspath(svn_wc
)
308 xml_string
= run_svn(svn_status_args
+ args
)
309 return parse_svn_status_xml(xml_string
, svn_wc
)
311 def get_one_svn_log_entry(svn_url
, rev_start
, rev_end
, stop_on_copy
=False, get_changed_paths
=True, get_revprops
=False):
313 Get the first SVN log entry in the requested revision range.
315 entries
= run_svn_log(svn_url
, rev_start
, rev_end
, 1, stop_on_copy
, get_changed_paths
, get_revprops
)
317 display_error("No SVN log for %s between revisions %s and %s" %
318 (svn_url
, rev_start
, rev_end
))
322 def get_first_svn_log_entry(svn_url
, rev_start
, rev_end
, get_changed_paths
=True):
324 Get the first log entry after/at the given revision number in an SVN branch.
325 By default the revision number is set to 0, which will give you the log
326 entry corresponding to the branch creaction.
328 NOTE: to know whether the branch creation corresponds to an SVN import or
329 a copy from another branch, inspect elements of the 'changed_paths' entry
330 in the returned dictionary.
332 return get_one_svn_log_entry(svn_url
, rev_start
, rev_end
, stop_on_copy
=True, get_changed_paths
=True)
334 def get_last_svn_log_entry(svn_url
, rev_start
, rev_end
, get_changed_paths
=True):
336 Get the last log entry before/at the given revision number in an SVN branch.
337 By default the revision number is set to HEAD, which will give you the log
338 entry corresponding to the latest commit in branch.
340 return get_one_svn_log_entry(svn_url
, rev_end
, rev_start
, stop_on_copy
=True, get_changed_paths
=True)
343 log_duration_threshold
= 10.0
344 log_min_chunk_length
= 10
346 def iter_svn_log_entries(svn_url
, first_rev
, last_rev
, stop_on_copy
=False, get_changed_paths
=True, get_revprops
=False):
348 Iterate over SVN log entries between first_rev and last_rev.
350 This function features chunked log fetching so that it isn't too nasty
351 to the SVN server if many entries are requested.
354 chunk_length
= log_min_chunk_length
355 chunk_interval_factor
= 1.0
356 while last_rev
== "HEAD" or cur_rev
<= last_rev
:
357 start_t
= time
.time()
358 stop_rev
= min(last_rev
, cur_rev
+ int(chunk_length
* chunk_interval_factor
))
359 entries
= run_svn_log(svn_url
, cur_rev
, stop_rev
, chunk_length
, stop_on_copy
, get_changed_paths
, get_revprops
)
360 duration
= time
.time() - start_t
362 if stop_rev
== last_rev
:
364 cur_rev
= stop_rev
+ 1
365 chunk_interval_factor
*= 2.0
369 cur_rev
= e
['revision'] + 1
370 # Adapt chunk length based on measured request duration
371 if duration
< log_duration_threshold
:
372 chunk_length
= int(chunk_length
* 2.0)
373 elif duration
> log_duration_threshold
* 2:
374 chunk_length
= max(log_min_chunk_length
, int(chunk_length
/ 2.0))
376 def commit_from_svn_log_entry(entry
, files
=None, keep_author
=False):
378 Given an SVN log entry and an optional sequence of files, do an svn commit.
380 # TODO: Run optional external shell hook here, for doing pre-commit filtering
381 # This will use the local timezone for displaying commit times
382 timestamp
= int(entry
['date'])
383 svn_date
= str(datetime
.fromtimestamp(timestamp
))
384 # Uncomment this one one if you prefer UTC commit times
385 #svn_date = "%d 0" % timestamp
387 options
= ["ci", "--force-log", "-m", entry
['message'] + "\nDate: " + svn_date
, "--username", entry
['author']]
389 options
= ["ci", "--force-log", "-m", entry
['message'] + "\nDate: " + svn_date
+ "\nAuthor: " + entry
['author']]
391 options
+= list(files
)
392 print "(Committing source rev #"+str(entry
['revision'])+"...)"
397 Check if a given file/folder is being tracked by Subversion.
398 Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
399 With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
400 Use "svn status" to check the status of the file/folder.
402 # TODO: Is there a better way to do this?
403 entries
= get_svn_status(p
)
407 return (d
['type'] == 'normal')
409 def find_svn_ancestors(source_repos_url
, source_url
, path_base
, path_offset
, path_rev
, \
410 copyfrom_path
, copyfrom_rev
, prefix
= ""):
412 Given a final svn-add'd path (path_base+"/"+path_offset) and the origin copy-from
413 path (copyfrom_path), walk the SVN history backwards to inspect the ancestory of
414 that path. Build a collection of copyfrom_path+revision pairs for each of the
415 branch-copies since the initial branch-creation. If we find a copyfrom_path which
416 source_url is a substring match of (e.g. we crawled back to the initial branch-
417 copy from trunk), then return the collection of ancestor paths. Otherwise,
418 copyfrom_path has no ancestory compared to source_url.
420 This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
421 file/folder was renamed in a branch and then that branch was merged back to trunk.
423 'source_repos_url' is the full URL to the root of the source repository,
424 e.g. 'file:///path/to/repo'
425 'source_url' is the full URL to the source path in the source repository.
426 'path_base' is base offset from source_repos_url that we did a "svn log" on.
427 This is usually the same offset off source_url vs. source_repos_url, but
428 in cases where replay_svn_ancestors is calling process_svn_log_entry
429 our path_base might be a branch folder rather than trunk.
431 'path_offset' is the offset from path_base to the file to check ancestry for,
432 e.g. 'projectA/file1.txt'. path = source_repos_url + path_base + path_offset.
433 'path_rev' is the revision ("svn log") that we're processing from the source repo.
434 'copyfrom_path' is copy-from path, e.g. '/branches/bug123/projectA/file1.txt'
435 'copyfrom_rev' is revision this copy-from path was copied at.
439 source_base
= source_url
[len(source_repos_url
):]
440 working_path
= copyfrom_path
441 working_rev
= copyfrom_rev
442 ancestors_temp
= [{'path': path_base+"/"+path_offset, 'revision': path_rev, 'copyfrom_path': copyfrom_path, 'copyfrom_rev': copyfrom_rev}
]
444 # Get the first "svn log" entry for this path (relative to @rev)
445 #working_path = working_base + "/" + working_offset
447 print prefix
+"\x1b[33m" + ">> find_svn_ancestors: " + source_repos_url
+ working_path
+"@"+str(working_rev
) + "\x1b[0m"
448 log_entry
= get_first_svn_log_entry(source_repos_url
+ working_path
+"@"+str(working_rev
), 1, str(working_rev
), True)
452 # Search for any actions on our target path (or parent paths).
453 changed_paths_temp
= []
454 for d
in log_entry
['changed_paths']:
456 if path
in working_path
:
457 changed_paths_temp
.append({'path': path, 'data': d}
)
458 if not changed_paths_temp
:
459 # If no matches, then we've hit the end of the chain and this path has no ancestry back to source_url.
462 # Reverse-sort any matches, so that we start with the most-granular (deepest in the tree) path.
463 changed_paths
= sorted(changed_paths_temp
, key
=itemgetter('path'), reverse
=True)
464 # Find the action for our working_path in this revision
465 for v
in changed_paths
:
468 # Check action-type for this file
470 if action
not in 'MARD':
471 display_error("In SVN rev. %d: action '%s' not supported. \
472 Please report a bug!" % (log_entry
['revision'], action
))
474 debug_desc
= "> " + action
+ " " + path
475 if d
['copyfrom_path']:
476 debug_desc
+= " (from " + d
['copyfrom_path']+"@"+str(d
['copyfrom_revision']) + ")"
477 print prefix
+"\x1b[33m" + debug_desc
+ "\x1b[0m"
480 # If file/folder was replaced, it has no ancestor
485 # If file/folder was deleted, it has no ancestor
490 # If file/folder was added but not a copy, it has no ancestor
491 if not d
['copyfrom_path']:
495 # Else, file/folder was added and is a copy, so add an entry to our ancestors list
496 # and keep checking for ancestors
498 print prefix
+"\x1b[33m" + ">> find_svn_ancestors: Found copy-from: " + \
499 path
+ " --> " + d
['copyfrom_path'] + "@" + str(d
['copyfrom_revision']) + "\x1b[0m"
500 ancestors_temp
.append({'path': path
, 'revision': log_entry
['revision'],
501 'copyfrom_path': d
['copyfrom_path'], 'copyfrom_rev': d
['copyfrom_revision']})
502 working_path
= working_path
.replace(d
['path'], d
['copyfrom_path'])
503 working_rev
= d
['copyfrom_revision']
504 # If we found a copy-from case which matches our source_base, we're done
505 if source_base
in working_path
:
508 # Else, follow the copy and keep on searching
512 working_path
= path_base
+"/"+path_offset
513 for idx
in range(0, len(ancestors_temp
)):
514 d
= ancestors_temp
[idx
]
515 working_path
= working_path
.replace(d
['path'], d
['copyfrom_path'])
516 working_rev
= d
['copyfrom_rev']
517 ancestors
.append({'path': working_path, 'revision': working_rev}
)
520 for idx
in range(len(ancestors
)):
522 max_len
= max(max_len
, len(d
['path']+"@"+str(d
['revision'])))
523 print prefix
+"\x1b[93m" + ">> find_svn_ancestors: Found parent ancestors: " + "\x1b[0m"
524 for idx
in range(len(ancestors
)-1):
526 d_next
= ancestors
[idx
+1]
527 print prefix
+"\x1b[33m" + " ["+str(idx
)+"] " + str(d
['path']+"@"+str(d
['revision'])).ljust(max_len
) + \
528 " <-- " + str(d_next
['path']+"@"+str(d_next
['revision'])).ljust(max_len
) + "\x1b[0m"
531 def get_rev_map(rev_map
, src_rev
, prefix
):
533 Find the equivalent rev # in the target repo for the given rev # from the source repo.
535 # Find the highest entry less-than-or-equal-to src_rev
536 for rev
in range(src_rev
+1, 1, -1):
538 print prefix
+ "\x1b[32m" + ">> get_rev_map: rev="+str(rev
)+" in_rev_map="+str(rev
in rev_map
) + "\x1b[0m"
541 # Else, we fell off the bottom of the rev_map. Ruh-roh...
542 display_error("Internal Error: get_rev_map: Unable to find match rev_map entry for src_rev=" + src_rev
)
544 def get_svn_dirlist(svn_path
, svn_rev
= ""):
546 Get a list of all the child contents (recusive) of the given folder path.
548 args
= ["list", "--recursive"]
551 args
+= ["-r", str(svn_rev
)]
552 path
+= "@"+str(svn_rev
)
554 paths
= run_svn(args
, False, True)
555 paths
= paths
.strip("\n").split("\n") if len(paths
)>1 else []
558 def replay_svn_copyfrom(source_repos_url
, source_url
, path_base
, path_offset
, target_url
, svn_rev
, \
559 copyfrom_path
, copyfrom_rev
, rev_map
, is_dir
= False, prefix
= ""):
560 source_base
= source_url
[len(source_repos_url
):]
561 srcfrom_path
= copyfrom_path
562 srcfrom_rev
= copyfrom_rev
564 print prefix
+ "\x1b[32m" + ">> replay_svn_copyfrom: Check copy-from: " + path_base
+" "+path_offset
+ " --> " + copyfrom_path
+"@"+str(copyfrom_rev
) + "\x1b[0m"
565 if source_base
in copyfrom_path
:
566 # The copy-from path is inside source_base, no need to check ancestry.
568 print prefix
+ "\x1b[32;1m" + ">> replay_svn_copyfrom: Check copy-from: Found copy (in source_base): " + copyfrom_path
+"@"+str(copyfrom_rev
) + "\x1b[0m"
570 # Check if the copy-from path has ancestors which chain back to the current path_base
571 ancestors
= find_svn_ancestors(source_repos_url
, source_url
,
572 path_base
, path_offset
, svn_rev
,
573 copyfrom_path
, copyfrom_rev
, prefix
+" ")
575 # The copy-from path has ancestory back to source_url.
576 # ancestors[n] is the original (pre-branch-copy) trunk path.
577 # ancestors[n-1] is the first commit on the new branch.
578 copyfrom_path
= ancestors
[len(ancestors
)-1]['path']
579 copyfrom_rev
= ancestors
[len(ancestors
)-1]['revision']
581 print prefix
+ "\x1b[32;1m" + ">> replay_svn_copyfrom: Check copy-from: Found parent: " + copyfrom_path
+"@"+str(copyfrom_rev
) + "\x1b[0m"
582 if not source_base
in copyfrom_path
:
583 # If this copy-from path has no ancestry back to source_url, then can't do a "svn copy".
584 # Create (parent) directory if needed
585 p_path
= path_offset
if is_dir
else os
.path
.dirname(path_offset
).strip() or '.'
586 if not os
.path
.exists(p_path
):
588 # Export the entire added tree.
589 run_svn(["export", "--force", "-r", str(copyfrom_rev
),
590 source_repos_url
+ copyfrom_path
+ "@" + str(copyfrom_rev
), path_offset
])
591 if not in_svn(path_offset
):
592 run_svn(["add", "--parents", path_offset
])
593 # TODO: Need to copy SVN properties from source repos
595 copyfrom_offset
= copyfrom_path
[len(source_base
):].strip('/')
597 print prefix
+ "\x1b[32m" + ">> replay_svn_copyfrom: svn_copy: Copy-from: " + copyfrom_path
+"@"+str(copyfrom_rev
) + " path_base: "+path_base
+ "\x1b[0m"
598 # Copy this path from the equivalent path+rev in the target repo, to create the
599 # equivalent history.
600 tgt_rev
= get_rev_map(rev_map
, copyfrom_rev
, prefix
+" ")
602 print prefix
+ "\x1b[32m" + ">> replay_svn_copyfrom: get_rev_map: " + str(copyfrom_rev
) + " (source) -> " + str(tgt_rev
) + " (target)" + "\x1b[0m"
603 run_svn(["copy", "-r", tgt_rev
, target_url
+"/"+copyfrom_offset
+"@"+str(tgt_rev
), path_offset
])
604 # Update the content in this fresh copy to match the final target revision.
606 paths_local
= get_svn_dirlist(path_offset
)
607 paths_remote
= get_svn_dirlist(source_url
+"/"+path_offset
, svn_rev
)
608 print prefix
+ "\x1b[32m" + "paths_local: " + str(paths_local
) + "\x1b[0m"
609 print prefix
+ "\x1b[32m" + "paths_remote: " + str(paths_remote
) + "\x1b[0m"
610 # Update files/folders which exist in remote but not local
611 for path
in paths_remote
:
612 if not path
in paths_local
:
613 path_is_dir
= True if path
[-1] == "/" else False
614 replay_svn_copyfrom(source_repos_url
, source_url
, path_base
, path_offset
+"/"+path
,
616 srcfrom_path
+"/"+path
, srcfrom_rev
,
617 rev_map
, path_is_dir
, prefix
+" ")
618 # Remove files/folders which exist in local but not remote
619 for path
in paths_local
:
620 if not path
in paths_remote
:
622 print " D " + path_base
+"/"+path_offset
+"/"+path
623 run_svn(["remove", "--force", path_offset
+"/"+path
])
624 # TODO: Does this handle deleted folders too? Wouldn't want to have a case
625 # where we only delete all files from folder but leave orphaned folder around.
627 run_svn(["export", "--force", "-r", str(svn_rev
),
628 source_repos_url
+path_base
+"/"+path_offset
+"@"+str(svn_rev
), path_offset
])
630 def replay_svn_ancestors(ancestors
, source_repos_url
, source_url
, source_offset
, \
631 target_url
, rev_map
, prefix
= ""):
633 Given an array of ancestor info (find_svn_ancestors), replay the history
634 to correctly track renames ("svn copy/move") across branch-merges.
636 For example, consider a sequence of events like this:
637 1. svn copy /trunk /branches/fix1
638 2. (Make some changes on /branches/fix1)
639 3. svn mv /branches/fix1/Proj1 /branches/fix1/Proj2 " Rename folder
640 4. svn mv /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt " Rename file inside renamed folder
641 5. svn co /trunk && svn merge /branches/fix1
642 After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
643 and and add of /trunk/Proj2 copy-from /branches/fix1/Proj2. If we were just
644 to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
645 we'd lose the logical history that Proj2/file2.txt is really a descendant
648 'ancestors' is the array returned by find_svn_ancestors() with the final
649 destination info appended to it by process_svn_log_entry().
650 'source_repos_url' is the full URL to the root of the source repository.
651 'source_url' is the full URL to the source path in the source repository.
654 source_base
= source_url
[len(source_repos_url
):]
655 for idx
in range(1, len(ancestors
)-1):
657 working_path
= d
['path']
658 working_rev
= d
['revision']
659 working_rev_next
= ancestors
[idx
+1]['revision']
660 # Do a "svn log" on the *parent* directory of working_path, since trying to get log info
661 # for the "old path" on the revision where the copy/move happened will fail.
662 p_working_path
= working_path
[:working_path
.rindex('/')] if '/' in working_path
else ""
664 print prefix
+ "\x1b[35m" + ">> replay_svn_ancestors: ["+str(idx
)+"]" + working_path
+"@"+str(working_rev
) + " ["+p_working_path
+"@"+str(working_rev
)+":"+str(working_rev_next
-1)+"]" + "\x1b[0m"
665 it_log_entries
= iter_svn_log_entries(source_repos_url
+p_working_path
, working_rev
, working_rev_next
-1)
666 for log_entry
in it_log_entries
:
667 #print prefix + ">> replay_svn_ancestors: log_entry: (" + source_repos_url+working_path + ")"
668 #print prefix + log_entry
670 process_svn_log_entry(log_entry
, source_repos_url
, source_url
,
671 source_repos_url
+working_path
, source_offset
,
672 target_url
, rev_map
, removed_paths
, [], prefix
+" ")
673 # Process any deferred removed actions
675 for path_offset
in removed_paths
:
677 print prefix
+ " D " + source_base
+"/"+path_offset
678 run_svn(["remove", "--force", path_offset
])
680 def process_svn_log_entry(log_entry
, source_repos_url
, source_url
, source_log_base_url
, source_offset
, \
681 target_url
, rev_map
, removed_paths
= [], commit_paths
= [], prefix
= ""):
683 Process SVN changes from the given log entry.
684 Returns array of all the paths in the working-copy that were changed,
685 i.e. the paths which need to be "svn commit".
687 'log_entry' is the array structure built by parse_svn_log_xml().
688 'source_repos_url' is the full URL to the root of the source repository.
689 'source_url' is the full URL to the source path in the source repository.
690 'source_log_base_url' is the full URL to the source path in the source
691 repository that we ran the "svn log" command based on. Most of the time,
692 this should match source_url, but when called from replay_svn_ancestors()
693 this could be a difference, e.g. source_url is "/trunk" but
694 source_log_base_url is "/branches/fix1".
695 'target_url' is the full URL to the target path in the target repository.
696 'rev_map' is the running mapping-table dictionary for source-repo rev #'s
697 to the equivalent target-repo rev #'s.
698 'removed_paths' is the working list of deferred deletions.
699 'commit_paths' is the working list of specific paths which changes to pass
700 to the final "svn commit".
702 # Get the relative offset of source_url and source_log_base_url based on source_repos_url
703 # e.g. '/branches/bug123'
704 source_base
= source_url
[len(source_repos_url
):]
705 path_base
= source_log_base_url
[len(source_repos_url
):]
707 print prefix
+ "\x1b[32m" + ">> process_svn_log_entry: " + source_log_base_url
+ "@" + str(log_entry
['revision']) + " (path_base:" + path_base
+ " source_offset:" + source_offset
+ ")" + "\x1b[0m"
709 svn_rev
= log_entry
['revision']
711 for d
in log_entry
['changed_paths']:
712 # Get the full path for this changed_path
713 # e.g. '/branches/bug123/projectA/file1.txt'
715 if not path
.startswith(path_base
+ "/"):
716 # Ignore changed files that are not part of this subdir
717 if path
!= path_base
:
719 print prefix
+ "\x1b[90m" + ">> process_svn_log_entry: Unrelated path: " + path
+ " (" + path_base
+ ")" + "\x1b[0m"
721 # Calculate the offset (based on path_base) for this changed_path
722 # e.g. 'projectA/file1.txt'
723 # (path = path_base + "/" + path_offset)
724 # (source_path = source_base + "/" + source_offset + path_offset)
725 path_offset
= path
[len(path_base
):].strip("/")
726 # Get the action for this path
728 if action
not in 'MARD':
729 display_error("In SVN rev. %d: action '%s' not supported. \
730 Please report a bug!" % (svn_rev
, action
))
732 # Try to be efficient and keep track of an explicit list of paths in the
733 # working copy that changed. If we commit from the root of the working copy,
734 # then SVN needs to crawl the entire working copy looking for pending changes.
735 # But, if we gather too many paths to commit, then we wipe commit_paths below
736 # and end-up doing a commit at the root of the working-copy.
737 if len (commit_paths
) < 100:
738 commit_paths
.append(path_offset
)
740 # Special-handling for replace's
743 msg
= " " + action
+ " " + d
['path']
744 if d
['copyfrom_path']:
745 msg
+= " (from " + d
['copyfrom_path'] + "@" + str(d
['copyfrom_revision']) + ")"
747 # If file was "replaced" (deleted then re-added, all in same revision),
748 # then we need to run the "svn rm" first, then change action='A'. This
749 # lets the normal code below handle re-"svn add"'ing the files. This
750 # should replicate the "replace".
751 run_svn(["remove", "--force", source_offset
+path_offset
])
754 # Handle all the various action-types
755 # (Handle "add" first, for "svn copy/move" support)
758 msg
= " " + action
+ " " + d
['path']
759 if d
['copyfrom_path']:
760 msg
+= " (from " + d
['copyfrom_path'] + "@" + str(d
['copyfrom_revision']) + ")"
762 # If we have any queued deletions for this same path, remove those if we're re-adding this path.
763 if (source_offset
+path_offset
) in removed_paths
:
764 removed_paths
.remove(source_offset
+path_offset
)
765 # Determine where to export from.
767 copyfrom_rev
= svn_rev
769 path_is_dir
= True if d
['kind'] == 'dir' else False
770 # Handle cases where this "add" was a copy from another URL in the source repos
771 if d
['copyfrom_revision']:
772 copyfrom_path
= d
['copyfrom_path']
773 copyfrom_rev
= d
['copyfrom_revision']
774 replay_svn_copyfrom(source_repos_url
, source_url
, path_base
, path_offset
,
776 copyfrom_path
, copyfrom_rev
,
777 rev_map
, path_is_dir
, prefix
+" ")
778 # Else just "svn export" the files from the source repo and "svn add" them.
780 # Create (parent) directory if needed
781 p_path
= source_offset
+path_offset
if path_is_dir
else os
.path
.dirname(source_offset
+path_offset
).strip() or '.'
782 if not os
.path
.exists(p_path
):
784 # Export the entire added tree.
785 run_svn(["export", "--force", "-r", str(copyfrom_rev
),
786 source_repos_url
+ copyfrom_path
+ "@" + str(copyfrom_rev
), source_offset
+path_offset
])
787 if not in_svn(source_offset
+path_offset
):
788 run_svn(["add", "--parents", source_offset
+path_offset
])
789 # TODO: Need to copy SVN properties from source repos
792 # Queue "svn remove" commands, to allow the action == 'A' handling the opportunity
793 # to do smart "svn copy" handling on copy/move/renames.
794 if not (source_offset
+path_offset
) in removed_paths
:
795 removed_paths
.append(source_offset
+path_offset
)
799 print prefix
+ " " + action
+ " " + d
['path']
800 # TODO: Is "svn merge -c" correct here? Should this just be an "svn export" plus
802 out
= run_svn(["merge", "-c", str(svn_rev
), "--non-recursive",
803 "--non-interactive", "--accept=theirs-full",
804 source_url
+"/"+path_offset
+"@"+str(svn_rev
), path_offset
])
807 display_error("Internal Error: process_svn_log_entry: Unhandled 'action' value: '" + action
+ "'")
811 def pull_svn_rev(log_entry
, source_repos_url
, source_repos_uuid
, source_url
, target_url
, rev_map
, keep_author
=False):
813 Pull SVN changes from the given log entry.
814 Returns the new SVN revision.
815 If an exception occurs, it will rollback to revision 'svn_rev - 1'.
817 svn_rev
= log_entry
['revision']
818 print "\n(Starting source rev #"+str(svn_rev
)+":)"
819 print "r"+str(log_entry
['revision']) + " | " + \
820 log_entry
['author'] + " | " + \
821 str(datetime
.fromtimestamp(int(log_entry
['date'])).isoformat(' '))
822 print log_entry
['message']
823 print "------------------------------------------------------------------------"
825 # Process all the paths in this log entry
828 process_svn_log_entry(log_entry
, source_repos_url
, source_url
, source_url
, "",
829 target_url
, rev_map
, removed_paths
, commit_paths
)
830 # Process any deferred removed actions
832 path_base
= source_url
[len(source_repos_url
):]
833 for path_offset
in removed_paths
:
835 print " D " + path_base
+"/"+path_offset
836 run_svn(["remove", "--force", path_offset
])
838 # If we had too many individual paths to commit, wipe the list and just commit at
839 # the root of the working copy.
840 if len (commit_paths
) > 99:
844 commit_from_svn_log_entry(log_entry
, commit_paths
, keep_author
=keep_author
)
845 except ExternalCommandFailed
:
846 # try to ignore the Properties conflicts on files and dirs
847 # use the copy from original_wc
848 # TODO: Need to re-work this?
849 #has_Conflict = False
850 #for d in log_entry['changed_paths']:
852 # p = p[len(path_base):].strip("/")
853 # if os.path.isfile(p):
854 # if os.path.isfile(p + ".prej"):
855 # has_Conflict = True
856 # shutil.copy(original_wc + os.sep + p, p)
857 # p2=os.sep + p.replace('_', '__').replace('/', '_') \
858 # + ".prej-" + str(svn_rev)
859 # shutil.move(p + ".prej", os.path.dirname(original_wc) + p2)
860 # w="\n### Properties conflicts ignored:"
861 # print "%s %s, in revision: %s\n" % (w, p, svn_rev)
862 # elif os.path.isdir(p):
863 # if os.path.isfile(p + os.sep + "dir_conflicts.prej"):
864 # has_Conflict = True
865 # p2=os.sep + p.replace('_', '__').replace('/', '_') \
866 # + "_dir__conflicts.prej-" + str(svn_rev)
867 # shutil.move(p + os.sep + "dir_conflicts.prej",
868 # os.path.dirname(original_wc) + p2)
869 # w="\n### Properties conflicts ignored:"
870 # print "%s %s, in revision: %s\n" % (w, p, svn_rev)
871 # out = run_svn(["propget", "svn:ignore",
872 # original_wc + os.sep + p])
874 # run_svn(["propset", "svn:ignore", out.strip(), p])
875 # out = run_svn(["propget", "svn:externel",
876 # original_wc + os.sep + p])
878 # run_svn(["propset", "svn:external", out.strip(), p])
881 # commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author)
883 raise ExternalCommandFailed
885 # Add source-tracking revprop's
886 run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_uuid", source_repos_uuid
])
887 run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_url", source_url
])
888 run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_rev", svn_rev
])
889 print "(Finished source rev #"+str(svn_rev
)+")"
893 usage
= "Usage: %prog [-a] [-c] [-r SVN rev] <Source SVN URL> <Target SVN URL>"
894 parser
= OptionParser(usage
)
895 parser
.add_option("-a", "--keep-author", action
="store_true",
896 dest
="keep_author", help="Keep revision Author or not")
897 parser
.add_option("-c", "--continue-from-break", action
="store_true",
898 dest
="cont_from_break",
899 help="Continue from previous break")
900 parser
.add_option("-r", "--svn-rev", type="int", dest
="svn_rev",
901 help="SVN revision to checkout from")
902 (options
, args
) = parser
.parse_args()
904 display_error("incorrect number of arguments\n\nTry: svn2svn.py --help",
907 source_url
= args
.pop(0).rstrip("/")
908 target_url
= args
.pop(0).rstrip("/")
909 if options
.keep_author
:
914 # Find the greatest_rev in the source repo
915 svn_info
= get_svn_info(source_url
)
916 greatest_rev
= svn_info
['revision']
917 # Get the base URL for the source repos, e.g. 'svn://svn.example.com/svn/repo'
918 source_repos_url
= svn_info
['repos_url']
919 # Get the UUID for the source repos
920 source_repos_uuid
= svn_info
['repos_uuid']
925 # if old working copy does not exist, disable continue mode
926 # TODO: Better continue support. Maybe include source repo's rev # in target commit info?
927 if not os
.path
.exists(dup_wc
):
928 options
.cont_from_break
= False
930 if not options
.cont_from_break
:
931 # Warn if Target SVN URL existed
932 cmd
= find_program("svn")
933 pipe
= Popen([cmd
] + ["list"] + [target_url
], executable
=cmd
,
934 stdout
=PIPE
, stderr
=PIPE
)
935 out
, err
= pipe
.communicate()
936 if pipe
.returncode
== 0:
937 print "Target SVN URL: %s existed!" % target_url
940 print "Press 'Enter' to Continue, 'Ctrl + C' to Cancel..."
941 print "(Timeout in 5 seconds)"
942 rfds
, wfds
, efds
= select
.select([sys
.stdin
], [], [], 5)
944 # Get log entry for the SVN revision we will check out
946 # If specify a rev, get log entry just before or at rev
947 svn_start_log
= get_last_svn_log_entry(source_url
, 1, options
.svn_rev
, False)
949 # Otherwise, get log entry of branch creation
950 # TODO: This call is *very* expensive on a repo with lots of revisions.
951 # Even though the call is passing --limit 1, it seems like that limit-filter
952 # is happening after SVN has fetched the full log history.
953 svn_start_log
= get_first_svn_log_entry(source_url
, 1, greatest_rev
, False)
955 # This is the revision we will start from for source_url
956 svn_rev
= svn_start_log
['revision']
958 # Check out a working copy of target_url
959 dup_wc
= os
.path
.abspath(dup_wc
)
960 if os
.path
.exists(dup_wc
):
961 shutil
.rmtree(dup_wc
)
962 svn_checkout(target_url
, dup_wc
)
965 # For the initial commit to the target URL, export all the contents from
966 # the source URL at the start-revision.
967 paths
= run_svn(["list", "-r", str(svn_rev
), source_url
+"@"+str(svn_rev
)])
969 paths
= paths
.strip("\n").split("\n")
974 # Directories have a trailing slash in the "svn list" output
976 path
=path
.rstrip('/')
977 if not os
.path
.exists(path
):
979 run_svn(["export", "--force", "-r" , str(svn_rev
), source_url
+"/"+path
+"@"+str(svn_rev
), path
])
980 run_svn(["add", path
])
981 commit_from_svn_log_entry(svn_start_log
, [], keep_author
)
982 # Add source-tracking revprop's
983 run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_uuid", source_repos_uuid
])
984 run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_url", source_url
])
985 run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_rev", svn_rev
])
987 dup_wc
= os
.path
.abspath(dup_wc
)
989 # TODO: Need better resume support. For the time being, expect caller explictly passes in resume revision.
990 svn_rev
= options
.svn_rev
992 display_error("Invalid arguments\n\nNeed to pass result rev # (-r) when using continue-mode (-c)", False)
994 # Load SVN log starting from svn_rev + 1
995 it_log_entries
= iter_svn_log_entries(source_url
, svn_rev
+ 1, greatest_rev
)
998 for log_entry
in it_log_entries
:
999 # Replay this revision from source_url into target_url
1000 pull_svn_rev(log_entry
, source_repos_url
, source_repos_uuid
, source_url
, target_url
, rev_map
, keep_author
)
1001 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
1002 run_svn(["up", dup_wc
])
1003 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
1004 dup_info
= get_svn_info(target_url
)
1005 dup_rev
= dup_info
['revision']
1006 svn_rev
= log_entry
['revision']
1007 rev_map
[svn_rev
] = dup_rev
1009 except KeyboardInterrupt:
1010 print "\nStopped by user."
1011 run_svn(["cleanup"])
1012 run_svn(["revert", "--recursive", "."])
1014 print "\nCommand failed with following error:\n"
1015 traceback
.print_exc()
1016 run_svn(["cleanup"])
1017 run_svn(["revert", "--recursive", "."])
1023 if __name__
== "__main__":