]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn.py
Only do replay_svn_ancestors() for kind="dir"
[svn2svn.git] / svn2svn.py
1 #!/usr/bin/env python
2 """
3 svn2svn.py
4
5 Replicate (replay) changesets from one SVN repository to another:
6 * Maintains full logical history (e.g. uses "svn copy" for renames).
7 * Maintains original commit messages.
8 * Cannot maintain original commit date, but appends original commit date
9 for each commit message: "Date: %d".
10 * Optionally maintain source author info. (Only supported if accessing
11 target SVN repo via file://)
12 * Optionally run an external shell script before each replayed commit
13 to give the ability to dynamically exclude or modify files as part
14 of the replay.
15
16 License: GPLv2, the same as hgsvn.
17 Author: Tony Duckles (https://github.com/tonyduckles/svn2svn)
18 (This is a forked and modified verison of http://code.google.com/p/svn2svn/)
19 """
20
21 import os
22 import sys
23 import time
24 import locale
25 import shutil
26 import select
27 import calendar
28 import traceback
29
30 from optparse import OptionParser
31 from subprocess import Popen, PIPE
32 from datetime import datetime
33
34 try:
35 from xml.etree import cElementTree as ET
36 except ImportError:
37 try:
38 from xml.etree import ElementTree as ET
39 except ImportError:
40 try:
41 import cElementTree as ET
42 except ImportError:
43 from elementtree import ElementTree as ET
44
45 svn_log_args = ['log', '--xml']
46 svn_info_args = ['info', '--xml']
47 svn_checkout_args = ['checkout', '-q']
48 svn_status_args = ['status', '--xml', '-v', '--ignore-externals']
49
50 # Setup debug options
51 debug = False
52 debug_runsvn_timing = False # Display how long each "svn" OS command took to run?
53 # Setup verbosity options
54 runsvn_showcmd = False # Display every "svn" OS command we run?
55 runsvn_showout = False # Display the stdout results from every "svn" OS command we run?
56 svnlog_verbose = True # Display each action + changed-path as we walk the history?
57
58 # define exception class
59 class ExternalCommandFailed(RuntimeError):
60 """
61 An external command failed.
62 """
63
64 def display_error(message, raise_exception = True):
65 """
66 Display error message, then terminate.
67 """
68 print "Error:", message
69 print
70 if raise_exception:
71 raise ExternalCommandFailed
72 else:
73 sys.exit(1)
74
75 # Windows compatibility code by Bill Baxter
76 if os.name == "nt":
77 def find_program(name):
78 """
79 Find the name of the program for Popen.
80 Windows is finnicky about having the complete file name. Popen
81 won't search the %PATH% for you automatically.
82 (Adapted from ctypes.find_library)
83 """
84 # See MSDN for the REAL search order.
85 base, ext = os.path.splitext(name)
86 if ext:
87 exts = [ext]
88 else:
89 exts = ['.bat', '.exe']
90 for directory in os.environ['PATH'].split(os.pathsep):
91 for e in exts:
92 fname = os.path.join(directory, base + e)
93 if os.path.exists(fname):
94 return fname
95 return None
96 else:
97 def find_program(name):
98 """
99 Find the name of the program for Popen.
100 On Unix, popen isn't picky about having absolute paths.
101 """
102 return name
103
104 def shell_quote(s):
105 if os.name == "nt":
106 q = '"'
107 else:
108 q = "'"
109 return q + s.replace('\\', '\\\\').replace("'", "'\"'\"'") + q
110
111 locale_encoding = locale.getpreferredencoding()
112
113 def run_svn(args, fail_if_stderr=False, encoding="utf-8"):
114 """
115 Run svn cmd in PIPE
116 exit if svn cmd failed
117 """
118 def _transform_arg(a):
119 if isinstance(a, unicode):
120 a = a.encode(encoding or locale_encoding)
121 elif not isinstance(a, str):
122 a = str(a)
123 return a
124 t_args = map(_transform_arg, args)
125
126 cmd = find_program("svn")
127 cmd_string = str(" ".join(map(shell_quote, [cmd] + t_args)))
128 if runsvn_showcmd:
129 print "$", "("+os.getcwd()+")", cmd_string
130 if debug_runsvn_timing:
131 time1 = time.time()
132 pipe = Popen([cmd] + t_args, executable=cmd, stdout=PIPE, stderr=PIPE)
133 out, err = pipe.communicate()
134 if debug_runsvn_timing:
135 time2 = time.time()
136 print "(" + str(round(time2-time1,4)) + " elapsed)"
137 if out and runsvn_showout:
138 print out
139 if pipe.returncode != 0 or (fail_if_stderr and err.strip()):
140 display_error("External program failed (return code %d): %s\n%s"
141 % (pipe.returncode, cmd_string, err))
142 return out
143
144 def svn_date_to_timestamp(svn_date):
145 """
146 Parse an SVN date as read from the XML output and
147 return the corresponding timestamp.
148 """
149 # Strip microseconds and timezone (always UTC, hopefully)
150 # XXX there are various ISO datetime parsing routines out there,
151 # cf. http://seehuhn.de/comp/pdate
152 date = svn_date.split('.', 2)[0]
153 time_tuple = time.strptime(date, "%Y-%m-%dT%H:%M:%S")
154 return calendar.timegm(time_tuple)
155
156 def parse_svn_info_xml(xml_string):
157 """
158 Parse the XML output from an "svn info" command and extract
159 useful information as a dict.
160 """
161 d = {}
162 tree = ET.fromstring(xml_string)
163 entry = tree.find('.//entry')
164 if entry:
165 d['url'] = entry.find('url').text
166 d['revision'] = int(entry.get('revision'))
167 d['repos_url'] = tree.find('.//repository/root').text
168 d['last_changed_rev'] = int(tree.find('.//commit').get('revision'))
169 d['kind'] = entry.get('kind')
170 return d
171
172 def parse_svn_log_xml(xml_string):
173 """
174 Parse the XML output from an "svn log" command and extract
175 useful information as a list of dicts (one per log changeset).
176 """
177 l = []
178 tree = ET.fromstring(xml_string)
179 for entry in tree.findall('logentry'):
180 d = {}
181 d['revision'] = int(entry.get('revision'))
182 # Some revisions don't have authors, most notably
183 # the first revision in a repository.
184 author = entry.find('author')
185 d['author'] = author is not None and author.text or None
186 d['date'] = svn_date_to_timestamp(entry.find('date').text)
187 # Some revisions may have empty commit message
188 message = entry.find('msg')
189 message = message is not None and message.text is not None \
190 and message.text.strip() or ""
191 # Replace DOS return '\r\n' and MacOS return '\r' with unix return '\n'
192 d['message'] = message.replace('\r\n', '\n').replace('\n\r', '\n'). \
193 replace('\r', '\n')
194 paths = d['changed_paths'] = []
195 for path in entry.findall('.//path'):
196 copyfrom_rev = path.get('copyfrom-rev')
197 if copyfrom_rev:
198 copyfrom_rev = int(copyfrom_rev)
199 paths.append({
200 'path': path.text,
201 'kind': path.get('kind'),
202 'action': path.get('action'),
203 'copyfrom_path': path.get('copyfrom-path'),
204 'copyfrom_revision': copyfrom_rev,
205 })
206 # Need to sort paths (i.e. into hierarchical order), so that process_svn_log_entry()
207 # can process actions in depth-first order.
208 paths.sort()
209 l.append(d)
210 return l
211
212 def parse_svn_status_xml(xml_string, base_dir=None):
213 """
214 Parse the XML output from an "svn status" command and extract
215 useful info as a list of dicts (one per status entry).
216 """
217 l = []
218 tree = ET.fromstring(xml_string)
219 for entry in tree.findall('.//entry'):
220 d = {}
221 path = entry.get('path')
222 if base_dir is not None:
223 assert path.startswith(base_dir)
224 path = path[len(base_dir):].lstrip('/\\')
225 d['path'] = path
226 wc_status = entry.find('wc-status')
227 if wc_status.get('item') == 'external':
228 d['type'] = 'external'
229 elif wc_status.get('revision') is not None:
230 d['type'] = 'normal'
231 else:
232 d['type'] = 'unversioned'
233 l.append(d)
234 return l
235
236 def get_svn_info(svn_url_or_wc, rev_number=None):
237 """
238 Get SVN information for the given URL or working copy,
239 with an optionally specified revision number.
240 Returns a dict as created by parse_svn_info_xml().
241 """
242 if rev_number is not None:
243 args = [svn_url_or_wc + "@" + str(rev_number)]
244 else:
245 args = [svn_url_or_wc]
246 xml_string = run_svn(svn_info_args + args, fail_if_stderr=True)
247 return parse_svn_info_xml(xml_string)
248
249 def svn_checkout(svn_url, checkout_dir, rev_number=None):
250 """
251 Checkout the given URL at an optional revision number.
252 """
253 args = []
254 if rev_number is not None:
255 args += ['-r', rev_number]
256 args += [svn_url, checkout_dir]
257 return run_svn(svn_checkout_args + args)
258
259 def run_svn_log(svn_url_or_wc, rev_start, rev_end, limit, stop_on_copy=False, get_changed_paths=True):
260 """
261 Fetch up to 'limit' SVN log entries between the given revisions.
262 """
263 if stop_on_copy:
264 args = ['--stop-on-copy']
265 else:
266 args = []
267 url = str(svn_url_or_wc)
268 if rev_start != 'HEAD' and rev_end != 'HEAD':
269 args += ['-r', '%s:%s' % (rev_start, rev_end)]
270 if not "@" in svn_url_or_wc:
271 url += "@" + str(max(rev_start, rev_end))
272 if get_changed_paths:
273 args += ['-v']
274 args += ['--limit', str(limit), url]
275 xml_string = run_svn(svn_log_args + args)
276 return parse_svn_log_xml(xml_string)
277
278 def get_svn_status(svn_wc, flags=None):
279 """
280 Get SVN status information about the given working copy.
281 """
282 # Ensure proper stripping by canonicalizing the path
283 svn_wc = os.path.abspath(svn_wc)
284 args = []
285 if flags:
286 args += [flags]
287 args += [svn_wc]
288 xml_string = run_svn(svn_status_args + args)
289 return parse_svn_status_xml(xml_string, svn_wc)
290
291 def get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=False, get_changed_paths=True):
292 """
293 Get the first SVN log entry in the requested revision range.
294 """
295 entries = run_svn_log(svn_url, rev_start, rev_end, 1, stop_on_copy, get_changed_paths)
296 if not entries:
297 display_error("No SVN log for %s between revisions %s and %s" %
298 (svn_url, rev_start, rev_end))
299
300 return entries[0]
301
302 def get_first_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
303 """
304 Get the first log entry after/at the given revision number in an SVN branch.
305 By default the revision number is set to 0, which will give you the log
306 entry corresponding to the branch creaction.
307
308 NOTE: to know whether the branch creation corresponds to an SVN import or
309 a copy from another branch, inspect elements of the 'changed_paths' entry
310 in the returned dictionary.
311 """
312 return get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True, get_changed_paths=True)
313
314 def get_last_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
315 """
316 Get the last log entry before/at the given revision number in an SVN branch.
317 By default the revision number is set to HEAD, which will give you the log
318 entry corresponding to the latest commit in branch.
319 """
320 return get_one_svn_log_entry(svn_url, rev_end, rev_start, stop_on_copy=True, get_changed_paths=True)
321
322
323 log_duration_threshold = 10.0
324 log_min_chunk_length = 10
325
326 def iter_svn_log_entries(svn_url, first_rev, last_rev):
327 """
328 Iterate over SVN log entries between first_rev and last_rev.
329
330 This function features chunked log fetching so that it isn't too nasty
331 to the SVN server if many entries are requested.
332 """
333 cur_rev = first_rev
334 chunk_length = log_min_chunk_length
335 chunk_interval_factor = 1.0
336 while last_rev == "HEAD" or cur_rev <= last_rev:
337 start_t = time.time()
338 stop_rev = min(last_rev, cur_rev + int(chunk_length * chunk_interval_factor))
339 entries = run_svn_log(svn_url, cur_rev, stop_rev, chunk_length)
340 duration = time.time() - start_t
341 if not entries:
342 if stop_rev == last_rev:
343 break
344 cur_rev = stop_rev + 1
345 chunk_interval_factor *= 2.0
346 continue
347 for e in entries:
348 yield e
349 cur_rev = e['revision'] + 1
350 # Adapt chunk length based on measured request duration
351 if duration < log_duration_threshold:
352 chunk_length = int(chunk_length * 2.0)
353 elif duration > log_duration_threshold * 2:
354 chunk_length = max(log_min_chunk_length, int(chunk_length / 2.0))
355
356 def commit_from_svn_log_entry(entry, files=None, keep_author=False):
357 """
358 Given an SVN log entry and an optional sequence of files, do an svn commit.
359 """
360 # TODO: Run optional external shell hook here, for doing pre-commit filtering
361 # This will use the local timezone for displaying commit times
362 timestamp = int(entry['date'])
363 svn_date = str(datetime.fromtimestamp(timestamp))
364 # Uncomment this one one if you prefer UTC commit times
365 #svn_date = "%d 0" % timestamp
366 if keep_author:
367 options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date, "--username", entry['author']]
368 else:
369 options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date + "\nAuthor: " + entry['author']]
370 if files:
371 options += list(files)
372 print "(Committing source rev #"+str(entry['revision'])+"...)"
373 run_svn(options)
374
375 def in_svn(p):
376 """
377 Check if a given file/folder is being tracked by Subversion.
378 Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
379 With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
380 Use "svn status" to check the status of the file/folder.
381 """
382 # TODO: Is there a better way to do this?
383 entries = get_svn_status(p)
384 if not entries:
385 return False
386 d = entries[0]
387 return (d['type'] == 'normal')
388
389 def find_svn_ancestors(source_repos_url, source_base, source_offset, copyfrom_path, copyfrom_rev):
390 """
391 Given a copy-from path (copyfrom_path), walk the SVN history backwards to inspect
392 the ancestory of that path. Build a collection of copyfrom_path+revision pairs
393 for each of the branch-copies since the initial branch-creation. If we find a
394 copyfrom_path which source_base is a substring match of (e.g. we crawled back to
395 the initial branch-copy from trunk), then return the collection of ancestor paths.
396 Otherwise, copyfrom_path has no ancestory compared to source_base.
397
398 This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
399 file/folder was renamed in a branch and then that branch was merged back to trunk.
400
401 PARAMETERS:
402 * source_repos_url = Full URL to root of repository, e.g. 'file:///path/to/repos'
403 * source_base = e.g. '/trunk'
404 * source_offset = e.g. 'projectA/file1.txt'
405 * copyfrom_path = e.g. '/branches/bug123/projectA/file1.txt'
406 """
407
408 done = False
409 working_path = copyfrom_path
410 working_base = copyfrom_path[:-len(source_offset)].rstrip('/')
411 working_offset = source_offset.strip('/')
412 working_rev = copyfrom_rev
413 ancestors = [{'path': [working_base, working_offset], 'revision': working_rev}]
414 while not done:
415 # Get the first "svn log" entry for this path (relative to @rev)
416 #working_path = working_base + "/" + working_offset
417 if debug:
418 print ">> find_svn_ancestors: " + source_repos_url + working_path + "@" + str(working_rev) + \
419 " (" + working_base + " " + working_offset + ")"
420 log_entry = get_first_svn_log_entry(source_repos_url + working_path + "@" + str(working_rev), 1, str(working_rev), True)
421 if not log_entry:
422 done = True
423 # Find the action for our working_path in this revision
424 for d in log_entry['changed_paths']:
425 path = d['path']
426 if not path in working_path:
427 continue
428 # Check action-type for this file
429 action = d['action']
430 if action not in 'MARD':
431 display_error("In SVN rev. %d: action '%s' not supported. \
432 Please report a bug!" % (log_entry['revision'], action))
433 if debug:
434 debug_desc = ": " + action + " " + path
435 if d['copyfrom_path']:
436 debug_desc += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
437 print debug_desc
438
439 if action == 'R':
440 # If file/folder was replaced, it has no ancestor
441 return []
442 if action == 'D':
443 # If file/folder was deleted, it has no ancestor
444 return []
445 if action == 'A':
446 # If file/folder was added but not a copy, it has no ancestor
447 if not d['copyfrom_path']:
448 return []
449 # Else, file/folder was added and is a copy, so check ancestors
450 path_old = d['copyfrom_path']
451 working_path = working_path.replace(path, path_old)
452 if working_base in working_path:
453 # If the new and old working_path share the same working_base, just need to update working_offset.
454 working_offset = working_path[len(working_base)+1:]
455 else:
456 # Else, assume that working_base has changed but working_offset is the same, e.g. a re-branch.
457 # TODO: Is this a safe assumption?!
458 working_base = working_path[:-len(working_offset)].rstrip('/')
459 working_rev = d['copyfrom_revision']
460 if debug:
461 print ">> find_svn_ancestors: copy-from: " + working_base + " " + working_offset + "@" + str(working_rev)
462 ancestors.append({'path': [working_base, working_offset], 'revision': working_rev})
463 # If we found a copy-from case which matches our source_base, we're done
464 if (path_old == source_base) or (path_old.startswith(source_base + "/")):
465 return ancestors
466 # Else, follow the copy and keep on searching
467 break
468 return None
469
470 def replay_svn_ancestors(ancestors, source_repos_url, source_url, target_url):
471 """
472 Given an array of ancestor info (find_svn_ancestors), replay the history
473 to correctly track renames ("svn copy/move") across branch-merges.
474
475 For example, consider a sequence of events like this:
476 1. svn copy /trunk /branches/fix1
477 2. (Make some changes on /branches/fix1)
478 3. svn copy /branches/fix1/Proj1 /branches/fix1/Proj2 " Rename folder
479 4. svn copy /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt " Rename file inside renamed folder
480 5. svn co /trunk && svn merge /branches/fix1
481 After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
482 and and add of /trunk/Proj2 comp-from /branches/fix1/Proj2. If we were just
483 to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
484 we'd lose the logical history that Proj2/file2.txt is really a descendant
485 of Proj1/file1.txt.
486
487 'source_repos_url' is the full URL to the root of the source repository.
488 'ancestors' is the array returned by find_svn_ancestors() with the final
489 destination info appended to it by process_svn_log_entry().
490 'dest_path'
491 """
492 # Ignore ancestors[0], which is the original (pre-branch-copy) trunk path
493 # Ignore ancestors[1], which is the original branch-creation commit
494 # Ignore ancestors[n], which is the final commit back to trunk
495 for idx in range(1, len(ancestors)-1):
496 ancestor = ancestors[idx]
497 source_base = ancestor['path'][0]
498 source_offset = ancestor['path'][1]
499 source_path = source_base + "/" + source_offset
500 source_rev = ancestor['revision']
501 source_rev_next = ancestors[idx+1]['revision']
502 # Do a "svn log" on the _parent_ directory of source_path, since trying to get log info
503 # for the "old path" on the revision where the copy/move happened will fail.
504 if "/" in source_path:
505 p_source_path = source_path[:source_path.rindex('/')]
506 else:
507 p_source_path = ""
508 if debug:
509 print ">> replay_svn_ancestors: ["+str(idx)+"]" + source_path+"@"+str(source_rev) + " ["+p_source_path+"@"+str(source_rev)+":"+str(source_rev_next-1)+"]"
510 it_log_entries = iter_svn_log_entries(source_repos_url+p_source_path, source_rev, source_rev_next-1)
511 for log_entry in it_log_entries:
512 #print ">> replay_svn_ancestors: log_entry: (" + source_repos_url+source_base + ")"
513 #print log_entry
514 # TODO: Hit a problem case with a rename-situation where the "remove" was committed ahead of the "add (copy)".
515 # Do we maybe need to buffer all the remove's until the end of the entire replay session?
516 # Or can we maybe work around this by passing an explicit rev # into "svn copy"?
517 process_svn_log_entry(log_entry, source_repos_url, source_repos_url+source_base, target_url)
518
519 def process_svn_log_entry(log_entry, source_repos_url, source_url, target_url):
520 """
521 Process SVN changes from the given log entry.
522 Returns array of all the paths in the working-copy that were changed,
523 i.e. the paths which need to be "svn commit".
524
525 'log_entry' is the array structure built by parse_svn_log_xml().
526 'source_repos_url' is the full URL to the root of the source repository.
527 'source_url' is the full URL to the source path in the source repository.
528 'target_url' is the full URL to the target path in the target repository.
529 """
530 # Get the relative offset of source_url based on source_repos_url, e.g. u'/branches/bug123'
531 source_base = source_url[len(source_repos_url):]
532 if debug:
533 print ">> process_svn_log_entry: " + source_url + " (" + source_base + ")"
534
535 svn_rev = log_entry['revision']
536 # Get current target revision, for "svn copy" support
537 dup_info = get_svn_info(target_url)
538 dup_rev = dup_info['revision']
539
540 removed_paths = []
541 modified_paths = []
542 unrelated_paths = []
543 commit_paths = []
544
545 for d in log_entry['changed_paths']:
546 # Get the full path for this changed_path
547 # e.g. u'/branches/bug123/projectA/file1.txt'
548 path = d['path']
549 if not path.startswith(source_base + "/"):
550 # Ignore changed files that are not part of this subdir
551 if path != source_base:
552 print ">> process_svn_log_entry: Unrelated path: " + path + " (" + source_base + ")"
553 unrelated_paths.append(path)
554 continue
555 # Calculate the offset (based on source_base) for this changed_path
556 # e.g. u'projectA/file1.txt'
557 # (path = source_base + "/" + path_offset)
558 path_offset = path[len(source_base):].strip("/")
559 # Get the action for this path
560 action = d['action']
561 if action not in 'MARD':
562 display_error("In SVN rev. %d: action '%s' not supported. \
563 Please report a bug!" % (svn_rev, action))
564
565 # Try to be efficient and keep track of an explicit list of paths in the
566 # working copy that changed. If we commit from the root of the working copy,
567 # then SVN needs to crawl the entire working copy looking for pending changes.
568 # But, if we gather too many paths to commit, then we wipe commit_paths below
569 # and end-up doing a commit at the root of the working-copy.
570 if len (commit_paths) < 100:
571 commit_paths.append(path_offset)
572
573 # Special-handling for replace's
574 is_replace = False
575 if action == 'R':
576 if svnlog_verbose:
577 msg = " " + d['action'] + " " + d['path']
578 if d['copyfrom_path']:
579 msg += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
580 print msg
581 # If file was "replaced" (deleted then re-added, all in same revision),
582 # then we need to run the "svn rm" first, then change action='A'. This
583 # lets the normal code below handle re-"svn add"'ing the files. This
584 # should replicate the "replace".
585 run_svn(["up", path_offset])
586 run_svn(["remove", "--force", path_offset])
587 action = 'A'
588 is_replace = True
589
590 # Handle all the various action-types
591 # (Handle "add" first, for "svn copy/move" support)
592 if action == 'A':
593 if svnlog_verbose:
594 msg = " " + d['action'] + " " + d['path']
595 if d['copyfrom_path']:
596 msg += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
597 print msg
598 # Determine where to export from
599 copyfrom_rev = svn_rev
600 copyfrom_path = path
601 svn_copy = False
602 # Handle cases where this "add" was a copy from another URL in the source repos
603 if d['copyfrom_revision']:
604 copyfrom_rev = d['copyfrom_revision']
605 copyfrom_path = d['copyfrom_path']
606 if debug:
607 print ">> process_svn_log_entry: copy-to: " + source_base + " " + path_offset
608 if source_base in copyfrom_path:
609 # If the copy-from path is inside the current working-copy, no need to check ancestry.
610 ancestors = []
611 copyfrom_path = copyfrom_path[len(source_base):].strip("/")
612 if debug:
613 print ">> process_svn_log_entry: Found copy: " + copyfrom_path+"@"+str(copyfrom_rev)
614 svn_copy = True
615 else:
616 ancestors = find_svn_ancestors(source_repos_url, source_base, path_offset,
617 copyfrom_path, copyfrom_rev)
618 if ancestors:
619 # Reverse the list, so that we loop in chronological order
620 ancestors.reverse()
621 # Append the current revision
622 ancestors.append({'path': [source_base, path_offset], 'revision': svn_rev})
623 # ancestors[0] is the original (pre-branch-copy) trunk path.
624 # ancestors[1] is the first commit on the new branch.
625 copyfrom_rev = ancestors[0]['revision']
626 copyfrom_base = ancestors[0]['path'][0]
627 copyfrom_offset = ancestors[0]['path'][1]
628 copyfrom_path = copyfrom_base + copyfrom_offset
629 if debug:
630 print ">> process_svn_log_entry: FOUND PARENT:"
631 for idx in range(0,len(ancestors)):
632 ancestor = ancestors[idx]
633 print " ["+str(idx)+"] " + ancestor['path'][0]+" "+ancestor['path'][1]+"@"+str(ancestor['revision'])
634 #print ">> process_svn_log_entry: copyfrom_path (before): " + copyfrom_path + " source_base: " + source_base + " p: " + p
635 copyfrom_path = copyfrom_path[len(source_base):].strip("/")
636 #print ">> process_svn_log_entry: copyfrom_path (after): " + copyfrom_path
637 svn_copy = True
638 # If this add was a copy-from, do a smart replay of the ancestors' history.
639 # Else just copy/export the files from the source repo and "svn add" them.
640 if svn_copy:
641 if debug:
642 print ">> process_svn_log_entry: svn_copy: copy-from: " + copyfrom_path+"@"+str(copyfrom_rev) + " source_base: "+source_base + " len(ancestors): " + str(len(ancestors))
643 # If we don't have any ancestors, then this is just a straight "svn copy" in the current working-copy.
644 if not ancestors:
645 # ...but not if the target is already tracked, because this might run several times for the same path.
646 # TODO: Is there a better way to avoid recusion bugs? Maybe a collection of processed paths?
647 if not in_svn(path_offset):
648 run_svn(["copy", "-r", dup_rev, target_url+"/"+copyfrom_path+"@"+str(dup_rev), path_offset])
649 else:
650 if d['kind'] == 'dir':
651 # Replay any actions which happened to this folder from the ancestor path(s).
652 replay_svn_ancestors(ancestors, source_repos_url, source_url, target_url)
653 else:
654 # Just do a straight "svn copy" for files. There isn't any kind of "dependent"
655 # history we might need to replay like for folders.
656 run_svn(["copy", "-r", dup_rev, target_url+"/"+copyfrom_path+"@"+str(dup_rev), path_offset])
657 else:
658 # Create (parent) directory if needed
659 if d['kind'] == 'dir':
660 p_path = path_offset
661 else:
662 p_path = os.path.dirname(path_offset).strip() or '.'
663 if not os.path.exists(p_path):
664 os.makedirs(p_path)
665 # Export the entire added tree.
666 run_svn(["export", "--force", "-r", str(copyfrom_rev),
667 source_repos_url + copyfrom_path + "@" + str(copyfrom_rev), path_offset])
668 # TODO: The "no in_svn" condition here is wrong for replace cases.
669 # Added the in_svn condition here originally since "svn export" is recursive
670 # but "svn log" will have an entry for each indiv file, hence we run into a
671 # cannot-re-add-file-which-is-already-added issue.
672 if (not in_svn(path_offset)) or (is_replace):
673 run_svn(["add", "--parents", path_offset])
674 # TODO: Need to copy SVN properties from source repos
675
676 elif action == 'D':
677 # Queue "svn remove" commands, to allow the action == 'A' handling the opportunity
678 # to do smart "svn copy" handling on copy/move/renames.
679 removed_paths.append(path_offset)
680
681 elif action == 'R':
682 # TODO
683 display_error("Internal Error: Handling for action='R' not implemented yet.")
684
685 elif action == 'M':
686 modified_paths.append(path_offset)
687
688 else:
689 display_error("Internal Error: pull_svn_rev: Unhandled 'action' value: '" + action + "'")
690
691 if removed_paths:
692 for r in removed_paths:
693 if svnlog_verbose:
694 print " D " + r
695 # TODO: Is the "svn up" here needed?
696 run_svn(["up", r])
697 run_svn(["remove", "--force", r])
698
699 if modified_paths:
700 for m in modified_paths:
701 if svnlog_verbose:
702 print " M " + m
703 # TODO: Is the "svn up" here needed?
704 run_svn(["up", m])
705 m_url = source_url + "/" + m
706 out = run_svn(["merge", "-c", str(svn_rev), "--non-recursive",
707 "--non-interactive", "--accept=theirs-full",
708 m_url+"@"+str(svn_rev), m])
709
710 if unrelated_paths:
711 print "Unrelated paths: (vs. '" + source_base + "')"
712 print "*", unrelated_paths
713
714 return commit_paths
715
716 def pull_svn_rev(log_entry, source_repos_url, source_url, target_url, keep_author=False):
717 """
718 Pull SVN changes from the given log entry.
719 Returns the new SVN revision.
720 If an exception occurs, it will rollback to revision 'svn_rev - 1'.
721 """
722 ## Get the relative offset of source_url based on source_repos_url, e.g. u'/branches/bug123'
723 #source_base = source_url[len(source_repos_url):]
724
725 svn_rev = log_entry['revision']
726 print "\n(Starting source rev #"+str(svn_rev)+":)"
727 print "r"+str(log_entry['revision']) + " | " + \
728 log_entry['author'] + " | " + \
729 str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' '))
730 print log_entry['message']
731 print "------------------------------------------------------------------------"
732 commit_paths = process_svn_log_entry(log_entry, source_repos_url, source_url, target_url)
733
734 # If we had too many individual paths to commit, wipe the list and just commit at
735 # the root of the working copy.
736 if len (commit_paths) > 99:
737 commit_paths = []
738
739 # TODO: Use SVN properties to track source URL + rev in the target repo?
740 # This would provide a more reliable resume-support
741 try:
742 commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author)
743 except ExternalCommandFailed:
744 # try to ignore the Properties conflicts on files and dirs
745 # use the copy from original_wc
746 # TODO: Need to re-work this?
747 #has_Conflict = False
748 #for d in log_entry['changed_paths']:
749 # p = d['path']
750 # p = p[len(source_base):].strip("/")
751 # if os.path.isfile(p):
752 # if os.path.isfile(p + ".prej"):
753 # has_Conflict = True
754 # shutil.copy(original_wc + os.sep + p, p)
755 # p2=os.sep + p.replace('_', '__').replace('/', '_') \
756 # + ".prej-" + str(svn_rev)
757 # shutil.move(p + ".prej", os.path.dirname(original_wc) + p2)
758 # w="\n### Properties conflicts ignored:"
759 # print "%s %s, in revision: %s\n" % (w, p, svn_rev)
760 # elif os.path.isdir(p):
761 # if os.path.isfile(p + os.sep + "dir_conflicts.prej"):
762 # has_Conflict = True
763 # p2=os.sep + p.replace('_', '__').replace('/', '_') \
764 # + "_dir__conflicts.prej-" + str(svn_rev)
765 # shutil.move(p + os.sep + "dir_conflicts.prej",
766 # os.path.dirname(original_wc) + p2)
767 # w="\n### Properties conflicts ignored:"
768 # print "%s %s, in revision: %s\n" % (w, p, svn_rev)
769 # out = run_svn(["propget", "svn:ignore",
770 # original_wc + os.sep + p])
771 # if out:
772 # run_svn(["propset", "svn:ignore", out.strip(), p])
773 # out = run_svn(["propget", "svn:externel",
774 # original_wc + os.sep + p])
775 # if out:
776 # run_svn(["propset", "svn:external", out.strip(), p])
777 ## try again
778 #if has_Conflict:
779 # commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author)
780 #else:
781 raise ExternalCommandFailed
782 print "(Finished source rev #"+str(svn_rev)+")"
783
784
785 def main():
786 usage = "Usage: %prog [-a] [-c] [-r SVN rev] <Source SVN URL> <Target SVN URL>"
787 parser = OptionParser(usage)
788 parser.add_option("-a", "--keep-author", action="store_true",
789 dest="keep_author", help="Keep revision Author or not")
790 parser.add_option("-c", "--continue-from-break", action="store_true",
791 dest="cont_from_break",
792 help="Continue from previous break")
793 parser.add_option("-r", "--svn-rev", type="int", dest="svn_rev",
794 help="SVN revision to checkout from")
795 (options, args) = parser.parse_args()
796 if len(args) != 2:
797 display_error("incorrect number of arguments\n\nTry: svn2svn.py --help",
798 False)
799
800 source_url = args.pop(0).rstrip("/")
801 target_url = args.pop(0).rstrip("/")
802 if options.keep_author:
803 keep_author = True
804 else:
805 keep_author = False
806
807 # Find the greatest_rev in the source repo
808 svn_info = get_svn_info(source_url)
809 greatest_rev = svn_info['revision']
810
811 dup_wc = "_dup_wc"
812
813 # if old working copy does not exist, disable continue mode
814 # TODO: Better continue support. Maybe include source repo's rev # in target commit info?
815 if not os.path.exists(dup_wc):
816 options.cont_from_break = False
817
818 if not options.cont_from_break:
819 # Warn if Target SVN URL existed
820 cmd = find_program("svn")
821 pipe = Popen([cmd] + ["list"] + [target_url], executable=cmd,
822 stdout=PIPE, stderr=PIPE)
823 out, err = pipe.communicate()
824 if pipe.returncode == 0:
825 print "Target SVN URL: %s existed!" % target_url
826 if out:
827 print out
828 print "Press 'Enter' to Continue, 'Ctrl + C' to Cancel..."
829 print "(Timeout in 5 seconds)"
830 rfds, wfds, efds = select.select([sys.stdin], [], [], 5)
831
832 # Get log entry for the SVN revision we will check out
833 if options.svn_rev:
834 # If specify a rev, get log entry just before or at rev
835 svn_start_log = get_last_svn_log_entry(source_url, 1, options.svn_rev, False)
836 else:
837 # Otherwise, get log entry of branch creation
838 # TODO: This call is *very* expensive on a repo with lots of revisions.
839 # Even though the call is passing --limit 1, it seems like that limit-filter
840 # is happening after SVN has fetched the full log history.
841 svn_start_log = get_first_svn_log_entry(source_url, 1, greatest_rev, False)
842
843 # This is the revision we will start from for source_url
844 svn_rev = svn_start_log['revision']
845
846 # Check out a working copy of target_url
847 dup_wc = os.path.abspath(dup_wc)
848 if os.path.exists(dup_wc):
849 shutil.rmtree(dup_wc)
850 svn_checkout(target_url, dup_wc)
851 os.chdir(dup_wc)
852
853 # For the initial commit to the target URL, export all the contents from
854 # the source URL at the start-revision.
855 paths = run_svn(["list", "-r", str(svn_rev), source_url+"@"+str(svn_rev)])
856 paths = paths.strip("\n").split("\n")
857 for path in paths:
858 if not path:
859 # Skip null lines
860 break
861 # Directories have a trailing slash in the "svn list" output
862 if path[-1] == "/":
863 path=path.rstrip('/')
864 if not os.path.exists(path):
865 os.makedirs(path)
866 run_svn(["export", "--force", "-r" , str(svn_rev), source_url+"/"+path+"@"+str(svn_rev), path])
867 run_svn(["add", path])
868 commit_from_svn_log_entry(svn_start_log, [], keep_author)
869 else:
870 dup_wc = os.path.abspath(dup_wc)
871 os.chdir(dup_wc)
872 # TODO: Need better resume support. For the time being, expect caller explictly passes in resume revision.
873 svn_rev = options.svn_rev
874 if svn_rev < 1:
875 display_error("Invalid arguments\n\nNeed to pass result rev # (-r) when using continue-mode (-c)", False)
876
877
878 # Get SVN info
879 svn_info = get_svn_info(source_url)
880 # Get the base URL for the source repos, e.g. u'svn://svn.example.com/svn/repo'
881 source_repos_url = svn_info['repos_url']
882
883 # Load SVN log starting from svn_rev + 1
884 it_log_entries = iter_svn_log_entries(source_url, svn_rev + 1, greatest_rev)
885
886 try:
887 for log_entry in it_log_entries:
888 # Replay this revision from source_url into target_url
889 pull_svn_rev(log_entry, source_repos_url, source_url, target_url, keep_author)
890 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
891 run_svn(["up", dup_wc])
892
893 except KeyboardInterrupt:
894 print "\nStopped by user."
895 run_svn(["cleanup"])
896 run_svn(["revert", "--recursive", "."])
897 except:
898 print "\nCommand failed with following error:\n"
899 traceback.print_exc()
900 run_svn(["cleanup"])
901 run_svn(["revert", "--recursive", "."])
902 finally:
903 run_svn(["up"])
904 print "\nFinished!"
905
906
907 if __name__ == "__main__":
908 main()
909
910 # vim:sts=4:sw=4: