]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn.py
Fix display for action="D"
[svn2svn.git] / svn2svn.py
1 #!/usr/bin/env python
2 """
3 svn2svn.py
4
5 Replicate (replay) changesets from one SVN repository to another:
6 * Maintains full logical history (e.g. uses "svn copy" for renames).
7 * Maintains original commit messages.
8 * Cannot maintain original commit date, but appends original commit date
9 for each commit message: "Date: %d".
10 * Optionally maintain source author info. (Only supported if accessing
11 target SVN repo via file://)
12 * Optionally run an external shell script before each replayed commit
13 to give the ability to dynamically exclude or modify files as part
14 of the replay.
15
16 License: GPLv2, the same as hgsvn.
17 Author: Tony Duckles (https://github.com/tonyduckles/svn2svn)
18 (This is a forked and modified verison of http://code.google.com/p/svn2svn/)
19 """
20
21 import os
22 import sys
23 import time
24 import locale
25 import shutil
26 import select
27 import calendar
28 import traceback
29
30 from optparse import OptionParser
31 from subprocess import Popen, PIPE
32 from datetime import datetime
33 from operator import itemgetter
34
35 try:
36 from xml.etree import cElementTree as ET
37 except ImportError:
38 try:
39 from xml.etree import ElementTree as ET
40 except ImportError:
41 try:
42 import cElementTree as ET
43 except ImportError:
44 from elementtree import ElementTree as ET
45
46 svn_log_args = ['log', '--xml']
47 svn_info_args = ['info', '--xml']
48 svn_checkout_args = ['checkout', '-q']
49 svn_status_args = ['status', '--xml', '-v', '--ignore-externals']
50
51 # Setup debug options
52 debug = False
53 debug_runsvn_timing = False # Display how long each "svn" OS command took to run?
54 # Setup verbosity options
55 runsvn_showcmd = False # Display every "svn" OS command we run?
56 runsvn_showout = False # Display the stdout results from every "svn" OS command we run?
57 svnlog_verbose = True # Display each action + changed-path as we walk the history?
58
59 # define exception class
60 class ExternalCommandFailed(RuntimeError):
61 """
62 An external command failed.
63 """
64
65 def display_error(message, raise_exception = True):
66 """
67 Display error message, then terminate.
68 """
69 print "Error:", message
70 print
71 if raise_exception:
72 raise ExternalCommandFailed
73 else:
74 sys.exit(1)
75
76 # Windows compatibility code by Bill Baxter
77 if os.name == "nt":
78 def find_program(name):
79 """
80 Find the name of the program for Popen.
81 Windows is finnicky about having the complete file name. Popen
82 won't search the %PATH% for you automatically.
83 (Adapted from ctypes.find_library)
84 """
85 # See MSDN for the REAL search order.
86 base, ext = os.path.splitext(name)
87 if ext:
88 exts = [ext]
89 else:
90 exts = ['.bat', '.exe']
91 for directory in os.environ['PATH'].split(os.pathsep):
92 for e in exts:
93 fname = os.path.join(directory, base + e)
94 if os.path.exists(fname):
95 return fname
96 return None
97 else:
98 def find_program(name):
99 """
100 Find the name of the program for Popen.
101 On Unix, popen isn't picky about having absolute paths.
102 """
103 return name
104
105 def shell_quote(s):
106 if os.name == "nt":
107 q = '"'
108 else:
109 q = "'"
110 return q + s.replace('\\', '\\\\').replace("'", "'\"'\"'") + q
111
112 locale_encoding = locale.getpreferredencoding()
113
114 def run_svn(args, fail_if_stderr=False, encoding="utf-8"):
115 """
116 Run svn cmd in PIPE
117 exit if svn cmd failed
118 """
119 def _transform_arg(a):
120 if isinstance(a, unicode):
121 a = a.encode(encoding or locale_encoding)
122 elif not isinstance(a, str):
123 a = str(a)
124 return a
125 t_args = map(_transform_arg, args)
126
127 cmd = find_program("svn")
128 cmd_string = str(" ".join(map(shell_quote, [cmd] + t_args)))
129 if runsvn_showcmd:
130 print "$", "("+os.getcwd()+")", cmd_string
131 if debug_runsvn_timing:
132 time1 = time.time()
133 pipe = Popen([cmd] + t_args, executable=cmd, stdout=PIPE, stderr=PIPE)
134 out, err = pipe.communicate()
135 if debug_runsvn_timing:
136 time2 = time.time()
137 print "(" + str(round(time2-time1,4)) + " elapsed)"
138 if out and runsvn_showout:
139 print out
140 if pipe.returncode != 0 or (fail_if_stderr and err.strip()):
141 display_error("External program failed (return code %d): %s\n%s"
142 % (pipe.returncode, cmd_string, err))
143 return out
144
145 def svn_date_to_timestamp(svn_date):
146 """
147 Parse an SVN date as read from the XML output and
148 return the corresponding timestamp.
149 """
150 # Strip microseconds and timezone (always UTC, hopefully)
151 # XXX there are various ISO datetime parsing routines out there,
152 # cf. http://seehuhn.de/comp/pdate
153 date = svn_date.split('.', 2)[0]
154 time_tuple = time.strptime(date, "%Y-%m-%dT%H:%M:%S")
155 return calendar.timegm(time_tuple)
156
157 def parse_svn_info_xml(xml_string):
158 """
159 Parse the XML output from an "svn info" command and extract
160 useful information as a dict.
161 """
162 d = {}
163 tree = ET.fromstring(xml_string)
164 entry = tree.find('.//entry')
165 if entry:
166 d['url'] = entry.find('url').text
167 d['revision'] = int(entry.get('revision'))
168 d['repos_url'] = tree.find('.//repository/root').text
169 d['last_changed_rev'] = int(tree.find('.//commit').get('revision'))
170 d['kind'] = entry.get('kind')
171 return d
172
173 def parse_svn_log_xml(xml_string):
174 """
175 Parse the XML output from an "svn log" command and extract
176 useful information as a list of dicts (one per log changeset).
177 """
178 l = []
179 tree = ET.fromstring(xml_string)
180 for entry in tree.findall('logentry'):
181 d = {}
182 d['revision'] = int(entry.get('revision'))
183 # Some revisions don't have authors, most notably
184 # the first revision in a repository.
185 author = entry.find('author')
186 d['author'] = author is not None and author.text or None
187 d['date'] = svn_date_to_timestamp(entry.find('date').text)
188 # Some revisions may have empty commit message
189 message = entry.find('msg')
190 message = message is not None and message.text is not None \
191 and message.text.strip() or ""
192 # Replace DOS return '\r\n' and MacOS return '\r' with unix return '\n'
193 d['message'] = message.replace('\r\n', '\n').replace('\n\r', '\n'). \
194 replace('\r', '\n')
195 paths = []
196 for path in entry.findall('.//path'):
197 copyfrom_rev = path.get('copyfrom-rev')
198 if copyfrom_rev:
199 copyfrom_rev = int(copyfrom_rev)
200 paths.append({
201 'path': path.text,
202 'kind': path.get('kind'),
203 'action': path.get('action'),
204 'copyfrom_path': path.get('copyfrom-path'),
205 'copyfrom_revision': copyfrom_rev,
206 })
207 # Need to sort paths (i.e. into hierarchical order), so that process_svn_log_entry()
208 # can process actions in depth-first order.
209 d['changed_paths'] = sorted(paths, key=itemgetter('path'))
210 l.append(d)
211 return l
212
213 def parse_svn_status_xml(xml_string, base_dir=None):
214 """
215 Parse the XML output from an "svn status" command and extract
216 useful info as a list of dicts (one per status entry).
217 """
218 l = []
219 tree = ET.fromstring(xml_string)
220 for entry in tree.findall('.//entry'):
221 d = {}
222 path = entry.get('path')
223 if base_dir is not None:
224 assert path.startswith(base_dir)
225 path = path[len(base_dir):].lstrip('/\\')
226 d['path'] = path
227 wc_status = entry.find('wc-status')
228 if wc_status.get('item') == 'external':
229 d['type'] = 'external'
230 elif wc_status.get('revision') is not None:
231 d['type'] = 'normal'
232 else:
233 d['type'] = 'unversioned'
234 l.append(d)
235 return l
236
237 def get_svn_info(svn_url_or_wc, rev_number=None):
238 """
239 Get SVN information for the given URL or working copy,
240 with an optionally specified revision number.
241 Returns a dict as created by parse_svn_info_xml().
242 """
243 if rev_number is not None:
244 args = [svn_url_or_wc + "@" + str(rev_number)]
245 else:
246 args = [svn_url_or_wc]
247 xml_string = run_svn(svn_info_args + args, fail_if_stderr=True)
248 return parse_svn_info_xml(xml_string)
249
250 def svn_checkout(svn_url, checkout_dir, rev_number=None):
251 """
252 Checkout the given URL at an optional revision number.
253 """
254 args = []
255 if rev_number is not None:
256 args += ['-r', rev_number]
257 args += [svn_url, checkout_dir]
258 return run_svn(svn_checkout_args + args)
259
260 def run_svn_log(svn_url_or_wc, rev_start, rev_end, limit, stop_on_copy=False, get_changed_paths=True):
261 """
262 Fetch up to 'limit' SVN log entries between the given revisions.
263 """
264 if stop_on_copy:
265 args = ['--stop-on-copy']
266 else:
267 args = []
268 url = str(svn_url_or_wc)
269 if rev_start != 'HEAD' and rev_end != 'HEAD':
270 args += ['-r', '%s:%s' % (rev_start, rev_end)]
271 if not "@" in svn_url_or_wc:
272 url += "@" + str(max(rev_start, rev_end))
273 if get_changed_paths:
274 args += ['-v']
275 args += ['--limit', str(limit), url]
276 xml_string = run_svn(svn_log_args + args)
277 return parse_svn_log_xml(xml_string)
278
279 def get_svn_status(svn_wc, flags=None):
280 """
281 Get SVN status information about the given working copy.
282 """
283 # Ensure proper stripping by canonicalizing the path
284 svn_wc = os.path.abspath(svn_wc)
285 args = []
286 if flags:
287 args += [flags]
288 args += [svn_wc]
289 xml_string = run_svn(svn_status_args + args)
290 return parse_svn_status_xml(xml_string, svn_wc)
291
292 def get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=False, get_changed_paths=True):
293 """
294 Get the first SVN log entry in the requested revision range.
295 """
296 entries = run_svn_log(svn_url, rev_start, rev_end, 1, stop_on_copy, get_changed_paths)
297 if not entries:
298 display_error("No SVN log for %s between revisions %s and %s" %
299 (svn_url, rev_start, rev_end))
300
301 return entries[0]
302
303 def get_first_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
304 """
305 Get the first log entry after/at the given revision number in an SVN branch.
306 By default the revision number is set to 0, which will give you the log
307 entry corresponding to the branch creaction.
308
309 NOTE: to know whether the branch creation corresponds to an SVN import or
310 a copy from another branch, inspect elements of the 'changed_paths' entry
311 in the returned dictionary.
312 """
313 return get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True, get_changed_paths=True)
314
315 def get_last_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
316 """
317 Get the last log entry before/at the given revision number in an SVN branch.
318 By default the revision number is set to HEAD, which will give you the log
319 entry corresponding to the latest commit in branch.
320 """
321 return get_one_svn_log_entry(svn_url, rev_end, rev_start, stop_on_copy=True, get_changed_paths=True)
322
323
324 log_duration_threshold = 10.0
325 log_min_chunk_length = 10
326
327 def iter_svn_log_entries(svn_url, first_rev, last_rev):
328 """
329 Iterate over SVN log entries between first_rev and last_rev.
330
331 This function features chunked log fetching so that it isn't too nasty
332 to the SVN server if many entries are requested.
333 """
334 cur_rev = first_rev
335 chunk_length = log_min_chunk_length
336 chunk_interval_factor = 1.0
337 while last_rev == "HEAD" or cur_rev <= last_rev:
338 start_t = time.time()
339 stop_rev = min(last_rev, cur_rev + int(chunk_length * chunk_interval_factor))
340 entries = run_svn_log(svn_url, cur_rev, stop_rev, chunk_length)
341 duration = time.time() - start_t
342 if not entries:
343 if stop_rev == last_rev:
344 break
345 cur_rev = stop_rev + 1
346 chunk_interval_factor *= 2.0
347 continue
348 for e in entries:
349 yield e
350 cur_rev = e['revision'] + 1
351 # Adapt chunk length based on measured request duration
352 if duration < log_duration_threshold:
353 chunk_length = int(chunk_length * 2.0)
354 elif duration > log_duration_threshold * 2:
355 chunk_length = max(log_min_chunk_length, int(chunk_length / 2.0))
356
357 def commit_from_svn_log_entry(entry, files=None, keep_author=False):
358 """
359 Given an SVN log entry and an optional sequence of files, do an svn commit.
360 """
361 # TODO: Run optional external shell hook here, for doing pre-commit filtering
362 # This will use the local timezone for displaying commit times
363 timestamp = int(entry['date'])
364 svn_date = str(datetime.fromtimestamp(timestamp))
365 # Uncomment this one one if you prefer UTC commit times
366 #svn_date = "%d 0" % timestamp
367 if keep_author:
368 options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date, "--username", entry['author']]
369 else:
370 options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date + "\nAuthor: " + entry['author']]
371 if files:
372 options += list(files)
373 print "(Committing source rev #"+str(entry['revision'])+"...)"
374 run_svn(options)
375
376 def in_svn(p):
377 """
378 Check if a given file/folder is being tracked by Subversion.
379 Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
380 With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
381 Use "svn status" to check the status of the file/folder.
382 """
383 # TODO: Is there a better way to do this?
384 entries = get_svn_status(p)
385 if not entries:
386 return False
387 d = entries[0]
388 return (d['type'] == 'normal')
389
390 def find_svn_ancestors(source_repos_url, source_base, source_offset, copyfrom_path, copyfrom_rev):
391 """
392 Given a copy-from path (copyfrom_path), walk the SVN history backwards to inspect
393 the ancestory of that path. Build a collection of copyfrom_path+revision pairs
394 for each of the branch-copies since the initial branch-creation. If we find a
395 copyfrom_path which source_base is a substring match of (e.g. we crawled back to
396 the initial branch-copy from trunk), then return the collection of ancestor paths.
397 Otherwise, copyfrom_path has no ancestory compared to source_base.
398
399 This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
400 file/folder was renamed in a branch and then that branch was merged back to trunk.
401
402 PARAMETERS:
403 * source_repos_url = Full URL to root of repository, e.g. 'file:///path/to/repos'
404 * source_base = e.g. '/trunk'
405 * source_offset = e.g. 'projectA/file1.txt'
406 * copyfrom_path = e.g. '/branches/bug123/projectA/file1.txt'
407 """
408
409 done = False
410 working_path = copyfrom_path
411 working_base = copyfrom_path[:-len(source_offset)].rstrip('/')
412 working_offset = source_offset.strip('/')
413 working_rev = copyfrom_rev
414 ancestors = [{'path': [working_base, working_offset], 'revision': working_rev}]
415 while not done:
416 # Get the first "svn log" entry for this path (relative to @rev)
417 #working_path = working_base + "/" + working_offset
418 if debug:
419 print ">> find_svn_ancestors: " + source_repos_url + working_path + "@" + str(working_rev) + \
420 " (" + working_base + " " + working_offset + ")"
421 log_entry = get_first_svn_log_entry(source_repos_url + working_path + "@" + str(working_rev), 1, str(working_rev), True)
422 if not log_entry:
423 done = True
424 # Find the action for our working_path in this revision
425 for d in log_entry['changed_paths']:
426 path = d['path']
427 if not path in working_path:
428 continue
429 # Check action-type for this file
430 action = d['action']
431 if action not in 'MARD':
432 display_error("In SVN rev. %d: action '%s' not supported. \
433 Please report a bug!" % (log_entry['revision'], action))
434 if debug:
435 debug_desc = ": " + action + " " + path
436 if d['copyfrom_path']:
437 debug_desc += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
438 print debug_desc
439
440 if action == 'R':
441 # If file/folder was replaced, it has no ancestor
442 return []
443 if action == 'D':
444 # If file/folder was deleted, it has no ancestor
445 return []
446 if action == 'A':
447 # If file/folder was added but not a copy, it has no ancestor
448 if not d['copyfrom_path']:
449 return []
450 # Else, file/folder was added and is a copy, so check ancestors
451 path_old = d['copyfrom_path']
452 working_path = working_path.replace(path, path_old)
453 if working_base in working_path:
454 # If the new and old working_path share the same working_base, just need to update working_offset.
455 working_offset = working_path[len(working_base)+1:]
456 else:
457 # Else, assume that working_base has changed but working_offset is the same, e.g. a re-branch.
458 # TODO: Is this a safe assumption?!
459 working_base = working_path[:-len(working_offset)].rstrip('/')
460 working_rev = d['copyfrom_revision']
461 if debug:
462 print ">> find_svn_ancestors: copy-from: " + working_base + " " + working_offset + "@" + str(working_rev)
463 ancestors.append({'path': [working_base, working_offset], 'revision': working_rev})
464 # If we found a copy-from case which matches our source_base, we're done
465 if (path_old == source_base) or (path_old.startswith(source_base + "/")):
466 return ancestors
467 # Else, follow the copy and keep on searching
468 break
469 return None
470
471 def replay_svn_ancestors(ancestors, source_repos_url, source_url, target_url):
472 """
473 Given an array of ancestor info (find_svn_ancestors), replay the history
474 to correctly track renames ("svn copy/move") across branch-merges.
475
476 For example, consider a sequence of events like this:
477 1. svn copy /trunk /branches/fix1
478 2. (Make some changes on /branches/fix1)
479 3. svn copy /branches/fix1/Proj1 /branches/fix1/Proj2 " Rename folder
480 4. svn copy /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt " Rename file inside renamed folder
481 5. svn co /trunk && svn merge /branches/fix1
482 After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
483 and and add of /trunk/Proj2 comp-from /branches/fix1/Proj2. If we were just
484 to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
485 we'd lose the logical history that Proj2/file2.txt is really a descendant
486 of Proj1/file1.txt.
487
488 'source_repos_url' is the full URL to the root of the source repository.
489 'ancestors' is the array returned by find_svn_ancestors() with the final
490 destination info appended to it by process_svn_log_entry().
491 'dest_path'
492 """
493 # Ignore ancestors[0], which is the original (pre-branch-copy) trunk path
494 # Ignore ancestors[1], which is the original branch-creation commit
495 # Ignore ancestors[n], which is the final commit back to trunk
496 for idx in range(1, len(ancestors)-1):
497 ancestor = ancestors[idx]
498 source_base = ancestor['path'][0]
499 source_offset = ancestor['path'][1]
500 source_path = source_base + "/" + source_offset
501 source_rev = ancestor['revision']
502 source_rev_next = ancestors[idx+1]['revision']
503 # Do a "svn log" on the _parent_ directory of source_path, since trying to get log info
504 # for the "old path" on the revision where the copy/move happened will fail.
505 if "/" in source_path:
506 p_source_path = source_path[:source_path.rindex('/')]
507 else:
508 p_source_path = ""
509 if debug:
510 print ">> replay_svn_ancestors: ["+str(idx)+"]" + source_path+"@"+str(source_rev) + " ["+p_source_path+"@"+str(source_rev)+":"+str(source_rev_next-1)+"]"
511 it_log_entries = iter_svn_log_entries(source_repos_url+p_source_path, source_rev, source_rev_next-1)
512 for log_entry in it_log_entries:
513 #print ">> replay_svn_ancestors: log_entry: (" + source_repos_url+source_base + ")"
514 #print log_entry
515 # TODO: Hit a problem case with a rename-situation where the "remove" was committed ahead of the "add (copy)".
516 # Do we maybe need to buffer all the remove's until the end of the entire replay session?
517 # Or can we maybe work around this by passing an explicit rev # into "svn copy"?
518 process_svn_log_entry(log_entry, source_repos_url, source_repos_url+source_base, target_url)
519
520 def process_svn_log_entry(log_entry, source_repos_url, source_url, target_url):
521 """
522 Process SVN changes from the given log entry.
523 Returns array of all the paths in the working-copy that were changed,
524 i.e. the paths which need to be "svn commit".
525
526 'log_entry' is the array structure built by parse_svn_log_xml().
527 'source_repos_url' is the full URL to the root of the source repository.
528 'source_url' is the full URL to the source path in the source repository.
529 'target_url' is the full URL to the target path in the target repository.
530 """
531 # Get the relative offset of source_url based on source_repos_url, e.g. u'/branches/bug123'
532 source_base = source_url[len(source_repos_url):]
533 if debug:
534 print ">> process_svn_log_entry: " + source_url + " (" + source_base + ")"
535
536 svn_rev = log_entry['revision']
537 # Get current target revision, for "svn copy" support
538 dup_info = get_svn_info(target_url)
539 dup_rev = dup_info['revision']
540
541 removed_paths = []
542 unrelated_paths = []
543 commit_paths = []
544
545 for d in log_entry['changed_paths']:
546 # Get the full path for this changed_path
547 # e.g. u'/branches/bug123/projectA/file1.txt'
548 path = d['path']
549 if not path.startswith(source_base + "/"):
550 # Ignore changed files that are not part of this subdir
551 if path != source_base:
552 print ">> process_svn_log_entry: Unrelated path: " + path + " (" + source_base + ")"
553 unrelated_paths.append(path)
554 continue
555 # Calculate the offset (based on source_base) for this changed_path
556 # e.g. u'projectA/file1.txt'
557 # (path = source_base + "/" + path_offset)
558 path_offset = path[len(source_base):].strip("/")
559 # Get the action for this path
560 action = d['action']
561 if action not in 'MARD':
562 display_error("In SVN rev. %d: action '%s' not supported. \
563 Please report a bug!" % (svn_rev, action))
564
565 # Try to be efficient and keep track of an explicit list of paths in the
566 # working copy that changed. If we commit from the root of the working copy,
567 # then SVN needs to crawl the entire working copy looking for pending changes.
568 # But, if we gather too many paths to commit, then we wipe commit_paths below
569 # and end-up doing a commit at the root of the working-copy.
570 if len (commit_paths) < 100:
571 commit_paths.append(path_offset)
572
573 # Special-handling for replace's
574 is_replace = False
575 if action == 'R':
576 if svnlog_verbose:
577 msg = " " + action + " " + d['path']
578 if d['copyfrom_path']:
579 msg += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
580 print msg
581 # If file was "replaced" (deleted then re-added, all in same revision),
582 # then we need to run the "svn rm" first, then change action='A'. This
583 # lets the normal code below handle re-"svn add"'ing the files. This
584 # should replicate the "replace".
585 run_svn(["up", path_offset])
586 run_svn(["remove", "--force", path_offset])
587 action = 'A'
588 is_replace = True
589
590 # Handle all the various action-types
591 # (Handle "add" first, for "svn copy/move" support)
592 if action == 'A':
593 if svnlog_verbose:
594 msg = " " + action + " " + d['path']
595 if d['copyfrom_path']:
596 msg += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
597 print msg
598 # Determine where to export from
599 copyfrom_rev = svn_rev
600 copyfrom_path = path
601 svn_copy = False
602 # Handle cases where this "add" was a copy from another URL in the source repos
603 if d['copyfrom_revision']:
604 copyfrom_rev = d['copyfrom_revision']
605 copyfrom_path = d['copyfrom_path']
606 if debug:
607 print ">> process_svn_log_entry: copy-to: " + source_base + " " + path_offset
608 if source_base in copyfrom_path:
609 # If the copy-from path is inside the current working-copy, no need to check ancestry.
610 ancestors = []
611 copyfrom_path = copyfrom_path[len(source_base):].strip("/")
612 if debug:
613 print ">> process_svn_log_entry: Found copy: " + copyfrom_path+"@"+str(copyfrom_rev)
614 svn_copy = True
615 else:
616 ancestors = find_svn_ancestors(source_repos_url, source_base, path_offset,
617 copyfrom_path, copyfrom_rev)
618 if ancestors:
619 # Reverse the list, so that we loop in chronological order
620 ancestors.reverse()
621 # Append the current revision
622 ancestors.append({'path': [source_base, path_offset], 'revision': svn_rev})
623 # ancestors[0] is the original (pre-branch-copy) trunk path.
624 # ancestors[1] is the first commit on the new branch.
625 copyfrom_rev = ancestors[0]['revision']
626 copyfrom_base = ancestors[0]['path'][0]
627 copyfrom_offset = ancestors[0]['path'][1]
628 copyfrom_path = copyfrom_base + copyfrom_offset
629 if debug:
630 print ">> process_svn_log_entry: FOUND PARENT:"
631 for idx in range(0,len(ancestors)):
632 ancestor = ancestors[idx]
633 print " ["+str(idx)+"] " + ancestor['path'][0]+" "+ancestor['path'][1]+"@"+str(ancestor['revision'])
634 #print ">> process_svn_log_entry: copyfrom_path (before): " + copyfrom_path + " source_base: " + source_base + " p: " + p
635 copyfrom_path = copyfrom_path[len(source_base):].strip("/")
636 #print ">> process_svn_log_entry: copyfrom_path (after): " + copyfrom_path
637 svn_copy = True
638 # If this add was a copy-from, do a smart replay of the ancestors' history.
639 if svn_copy:
640 if debug:
641 print ">> process_svn_log_entry: svn_copy: copy-from: " + copyfrom_path+"@"+str(copyfrom_rev) + " source_base: "+source_base + " len(ancestors): " + str(len(ancestors))
642 # If we don't have any ancestors, then this is just a straight "svn copy" in the current working-copy.
643 if not ancestors:
644 # ...but not if the target is already tracked, because this might run several times for the same path.
645 # TODO: Is there a better way to avoid recusion bugs? Maybe a collection of processed paths?
646 if not in_svn(path_offset):
647 if os.path.exists(copyfrom_path):
648 # If the copyfrom_path exists in the working-copy, do a local copy
649 run_svn(["copy", copyfrom_path, path_offset])
650 else:
651 run_svn(["copy", "-r", dup_rev, target_url+"/"+copyfrom_path+"@"+str(dup_rev), path_offset])
652 else:
653 if d['kind'] == 'dir':
654 # Replay any actions which happened to this folder from the ancestor path(s).
655 replay_svn_ancestors(ancestors, source_repos_url, source_url, target_url)
656 else:
657 # Just do a straight "svn copy" for files. There isn't any kind of "dependent"
658 # history we might need to replay like for folders.
659 # TODO: Is this logic really correct? Doing a WC vs URL "svn copy" based on existence
660 # of *source* location seems a bit kludgy. Should there be a running list of
661 # renames during replay_svn_ancestors >> process_svn_log_entry?
662 if os.path.exists(copyfrom_path):
663 # If the copyfrom_path exists in the working-copy, do a local copy
664 run_svn(["copy", copyfrom_path, path_offset])
665 else:
666 # Else, could be a situation where replay_svn_ancestors() is replaying branch
667 # history and a copy was committed across two revisions: first the deletion
668 # followed by the later add. In such a case, we need to copy from HEAD (dup_rev)
669 # of the path in *target_url*
670 run_svn(["copy", "-r", dup_rev, target_url+"/"+copyfrom_path+"@"+str(dup_rev), path_offset])
671 # Else just copy/export the files from the source repo and "svn add" them.
672 else:
673 # Create (parent) directory if needed
674 if d['kind'] == 'dir':
675 p_path = path_offset
676 else:
677 p_path = os.path.dirname(path_offset).strip() or '.'
678 if not os.path.exists(p_path):
679 os.makedirs(p_path)
680 # Export the entire added tree.
681 run_svn(["export", "--force", "-r", str(copyfrom_rev),
682 source_repos_url + copyfrom_path + "@" + str(copyfrom_rev), path_offset])
683 # TODO: The "no in_svn" condition here is wrong for replace cases.
684 # Added the in_svn condition here originally since "svn export" is recursive
685 # but "svn log" will have an entry for each indiv file, hence we run into a
686 # cannot-re-add-file-which-is-already-added issue.
687 if (not in_svn(path_offset)) or (is_replace):
688 run_svn(["add", "--parents", path_offset])
689 # TODO: Need to copy SVN properties from source repos
690
691 elif action == 'D':
692 # Queue "svn remove" commands, to allow the action == 'A' handling the opportunity
693 # to do smart "svn copy" handling on copy/move/renames.
694 removed_paths.append(path_offset)
695
696 elif action == 'M':
697 if svnlog_verbose:
698 print " " + action + " " + d['path']
699 out = run_svn(["merge", "-c", str(svn_rev), "--non-recursive",
700 "--non-interactive", "--accept=theirs-full",
701 source_url+"/"+path_offset+"@"+str(svn_rev), path_offset])
702
703 else:
704 display_error("Internal Error: pull_svn_rev: Unhandled 'action' value: '" + action + "'")
705
706 if removed_paths:
707 for path_offset in removed_paths:
708 if svnlog_verbose:
709 print " D " + source_base+"/"+path_offset
710 run_svn(["remove", "--force", path_offset])
711
712 if unrelated_paths:
713 print "Unrelated paths: (vs. '" + source_base + "')"
714 print "*", unrelated_paths
715
716 return commit_paths
717
718 def pull_svn_rev(log_entry, source_repos_url, source_url, target_url, keep_author=False):
719 """
720 Pull SVN changes from the given log entry.
721 Returns the new SVN revision.
722 If an exception occurs, it will rollback to revision 'svn_rev - 1'.
723 """
724 ## Get the relative offset of source_url based on source_repos_url, e.g. u'/branches/bug123'
725 #source_base = source_url[len(source_repos_url):]
726
727 svn_rev = log_entry['revision']
728 print "\n(Starting source rev #"+str(svn_rev)+":)"
729 print "r"+str(log_entry['revision']) + " | " + \
730 log_entry['author'] + " | " + \
731 str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' '))
732 print log_entry['message']
733 print "------------------------------------------------------------------------"
734 commit_paths = process_svn_log_entry(log_entry, source_repos_url, source_url, target_url)
735
736 # If we had too many individual paths to commit, wipe the list and just commit at
737 # the root of the working copy.
738 if len (commit_paths) > 99:
739 commit_paths = []
740
741 # TODO: Use SVN properties to track source URL + rev in the target repo?
742 # This would provide a more reliable resume-support
743 try:
744 commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author)
745 except ExternalCommandFailed:
746 # try to ignore the Properties conflicts on files and dirs
747 # use the copy from original_wc
748 # TODO: Need to re-work this?
749 #has_Conflict = False
750 #for d in log_entry['changed_paths']:
751 # p = d['path']
752 # p = p[len(source_base):].strip("/")
753 # if os.path.isfile(p):
754 # if os.path.isfile(p + ".prej"):
755 # has_Conflict = True
756 # shutil.copy(original_wc + os.sep + p, p)
757 # p2=os.sep + p.replace('_', '__').replace('/', '_') \
758 # + ".prej-" + str(svn_rev)
759 # shutil.move(p + ".prej", os.path.dirname(original_wc) + p2)
760 # w="\n### Properties conflicts ignored:"
761 # print "%s %s, in revision: %s\n" % (w, p, svn_rev)
762 # elif os.path.isdir(p):
763 # if os.path.isfile(p + os.sep + "dir_conflicts.prej"):
764 # has_Conflict = True
765 # p2=os.sep + p.replace('_', '__').replace('/', '_') \
766 # + "_dir__conflicts.prej-" + str(svn_rev)
767 # shutil.move(p + os.sep + "dir_conflicts.prej",
768 # os.path.dirname(original_wc) + p2)
769 # w="\n### Properties conflicts ignored:"
770 # print "%s %s, in revision: %s\n" % (w, p, svn_rev)
771 # out = run_svn(["propget", "svn:ignore",
772 # original_wc + os.sep + p])
773 # if out:
774 # run_svn(["propset", "svn:ignore", out.strip(), p])
775 # out = run_svn(["propget", "svn:externel",
776 # original_wc + os.sep + p])
777 # if out:
778 # run_svn(["propset", "svn:external", out.strip(), p])
779 ## try again
780 #if has_Conflict:
781 # commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author)
782 #else:
783 raise ExternalCommandFailed
784 print "(Finished source rev #"+str(svn_rev)+")"
785
786
787 def main():
788 usage = "Usage: %prog [-a] [-c] [-r SVN rev] <Source SVN URL> <Target SVN URL>"
789 parser = OptionParser(usage)
790 parser.add_option("-a", "--keep-author", action="store_true",
791 dest="keep_author", help="Keep revision Author or not")
792 parser.add_option("-c", "--continue-from-break", action="store_true",
793 dest="cont_from_break",
794 help="Continue from previous break")
795 parser.add_option("-r", "--svn-rev", type="int", dest="svn_rev",
796 help="SVN revision to checkout from")
797 (options, args) = parser.parse_args()
798 if len(args) != 2:
799 display_error("incorrect number of arguments\n\nTry: svn2svn.py --help",
800 False)
801
802 source_url = args.pop(0).rstrip("/")
803 target_url = args.pop(0).rstrip("/")
804 if options.keep_author:
805 keep_author = True
806 else:
807 keep_author = False
808
809 # Find the greatest_rev in the source repo
810 svn_info = get_svn_info(source_url)
811 greatest_rev = svn_info['revision']
812
813 dup_wc = "_dup_wc"
814
815 # if old working copy does not exist, disable continue mode
816 # TODO: Better continue support. Maybe include source repo's rev # in target commit info?
817 if not os.path.exists(dup_wc):
818 options.cont_from_break = False
819
820 if not options.cont_from_break:
821 # Warn if Target SVN URL existed
822 cmd = find_program("svn")
823 pipe = Popen([cmd] + ["list"] + [target_url], executable=cmd,
824 stdout=PIPE, stderr=PIPE)
825 out, err = pipe.communicate()
826 if pipe.returncode == 0:
827 print "Target SVN URL: %s existed!" % target_url
828 if out:
829 print out
830 print "Press 'Enter' to Continue, 'Ctrl + C' to Cancel..."
831 print "(Timeout in 5 seconds)"
832 rfds, wfds, efds = select.select([sys.stdin], [], [], 5)
833
834 # Get log entry for the SVN revision we will check out
835 if options.svn_rev:
836 # If specify a rev, get log entry just before or at rev
837 svn_start_log = get_last_svn_log_entry(source_url, 1, options.svn_rev, False)
838 else:
839 # Otherwise, get log entry of branch creation
840 # TODO: This call is *very* expensive on a repo with lots of revisions.
841 # Even though the call is passing --limit 1, it seems like that limit-filter
842 # is happening after SVN has fetched the full log history.
843 svn_start_log = get_first_svn_log_entry(source_url, 1, greatest_rev, False)
844
845 # This is the revision we will start from for source_url
846 svn_rev = svn_start_log['revision']
847
848 # Check out a working copy of target_url
849 dup_wc = os.path.abspath(dup_wc)
850 if os.path.exists(dup_wc):
851 shutil.rmtree(dup_wc)
852 svn_checkout(target_url, dup_wc)
853 os.chdir(dup_wc)
854
855 # For the initial commit to the target URL, export all the contents from
856 # the source URL at the start-revision.
857 paths = run_svn(["list", "-r", str(svn_rev), source_url+"@"+str(svn_rev)])
858 paths = paths.strip("\n").split("\n")
859 for path in paths:
860 if not path:
861 # Skip null lines
862 break
863 # Directories have a trailing slash in the "svn list" output
864 if path[-1] == "/":
865 path=path.rstrip('/')
866 if not os.path.exists(path):
867 os.makedirs(path)
868 run_svn(["export", "--force", "-r" , str(svn_rev), source_url+"/"+path+"@"+str(svn_rev), path])
869 run_svn(["add", path])
870 commit_from_svn_log_entry(svn_start_log, [], keep_author)
871 else:
872 dup_wc = os.path.abspath(dup_wc)
873 os.chdir(dup_wc)
874 # TODO: Need better resume support. For the time being, expect caller explictly passes in resume revision.
875 svn_rev = options.svn_rev
876 if svn_rev < 1:
877 display_error("Invalid arguments\n\nNeed to pass result rev # (-r) when using continue-mode (-c)", False)
878
879
880 # Get SVN info
881 svn_info = get_svn_info(source_url)
882 # Get the base URL for the source repos, e.g. u'svn://svn.example.com/svn/repo'
883 source_repos_url = svn_info['repos_url']
884
885 # Load SVN log starting from svn_rev + 1
886 it_log_entries = iter_svn_log_entries(source_url, svn_rev + 1, greatest_rev)
887
888 try:
889 for log_entry in it_log_entries:
890 # Replay this revision from source_url into target_url
891 pull_svn_rev(log_entry, source_repos_url, source_url, target_url, keep_author)
892 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
893 run_svn(["up", dup_wc])
894
895 except KeyboardInterrupt:
896 print "\nStopped by user."
897 run_svn(["cleanup"])
898 run_svn(["revert", "--recursive", "."])
899 except:
900 print "\nCommand failed with following error:\n"
901 traceback.print_exc()
902 run_svn(["cleanup"])
903 run_svn(["revert", "--recursive", "."])
904 finally:
905 run_svn(["up"])
906 print "\nFinished!"
907
908
909 if __name__ == "__main__":
910 main()
911
912 # vim:sts=4:sw=4: