]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn.py
Add more TODO's. Need to support "svn copy" from an earlier commit on trunk.
[svn2svn.git] / svn2svn.py
1 #!/usr/bin/env python
2 """
3 svn2svn.py
4
5 Replicate (replay) changesets from one SVN repository to another:
6 * Maintains full logical history (e.g. uses "svn copy" for renames).
7 * Maintains original commit messages.
8 * Cannot maintain original commit date, but appends original commit date
9 for each commit message: "Date: %d".
10 * Optionally maintain source author info. (Only supported if accessing
11 target SVN repo via file://)
12 * Optionally run an external shell script before each replayed commit
13 to give the ability to dynamically exclude or modify files as part
14 of the replay.
15
16 License: GPLv2, the same as hgsvn.
17 Author: Tony Duckles (https://github.com/tonyduckles/svn2svn)
18 (This is a forked and modified verison of http://code.google.com/p/svn2svn/)
19 """
20
21 import os
22 import sys
23 import time
24 import locale
25 import shutil
26 import select
27 import calendar
28 import traceback
29
30 from optparse import OptionParser
31 from subprocess import Popen, PIPE
32 from datetime import datetime
33 from operator import itemgetter
34
35 try:
36 from xml.etree import cElementTree as ET
37 except ImportError:
38 try:
39 from xml.etree import ElementTree as ET
40 except ImportError:
41 try:
42 import cElementTree as ET
43 except ImportError:
44 from elementtree import ElementTree as ET
45
46 svn_log_args = ['log', '--xml']
47 svn_info_args = ['info', '--xml']
48 svn_checkout_args = ['checkout', '-q']
49 svn_status_args = ['status', '--xml', '-v', '--ignore-externals']
50
51 # Setup debug options
52 debug = False
53 debug_runsvn_timing = False # Display how long each "svn" OS command took to run?
54 # Setup verbosity options
55 runsvn_showcmd = False # Display every "svn" OS command we run?
56 runsvn_showout = False # Display the stdout results from every "svn" OS command we run?
57 svnlog_verbose = True # Display each action + changed-path as we walk the history?
58
59 # define exception class
60 class ExternalCommandFailed(RuntimeError):
61 """
62 An external command failed.
63 """
64
65 def display_error(message, raise_exception = True):
66 """
67 Display error message, then terminate.
68 """
69 print "Error:", message
70 print
71 if raise_exception:
72 raise ExternalCommandFailed
73 else:
74 sys.exit(1)
75
76 # Windows compatibility code by Bill Baxter
77 if os.name == "nt":
78 def find_program(name):
79 """
80 Find the name of the program for Popen.
81 Windows is finnicky about having the complete file name. Popen
82 won't search the %PATH% for you automatically.
83 (Adapted from ctypes.find_library)
84 """
85 # See MSDN for the REAL search order.
86 base, ext = os.path.splitext(name)
87 if ext:
88 exts = [ext]
89 else:
90 exts = ['.bat', '.exe']
91 for directory in os.environ['PATH'].split(os.pathsep):
92 for e in exts:
93 fname = os.path.join(directory, base + e)
94 if os.path.exists(fname):
95 return fname
96 return None
97 else:
98 def find_program(name):
99 """
100 Find the name of the program for Popen.
101 On Unix, popen isn't picky about having absolute paths.
102 """
103 return name
104
105 def shell_quote(s):
106 if os.name == "nt":
107 q = '"'
108 else:
109 q = "'"
110 return q + s.replace('\\', '\\\\').replace("'", "'\"'\"'") + q
111
112 locale_encoding = locale.getpreferredencoding()
113
114 def run_svn(args, fail_if_stderr=False, encoding="utf-8"):
115 """
116 Run svn cmd in PIPE
117 exit if svn cmd failed
118 """
119 def _transform_arg(a):
120 if isinstance(a, unicode):
121 a = a.encode(encoding or locale_encoding)
122 elif not isinstance(a, str):
123 a = str(a)
124 return a
125 t_args = map(_transform_arg, args)
126
127 cmd = find_program("svn")
128 cmd_string = str(" ".join(map(shell_quote, [cmd] + t_args)))
129 if runsvn_showcmd:
130 print "$", "("+os.getcwd()+")", cmd_string
131 if debug_runsvn_timing:
132 time1 = time.time()
133 pipe = Popen([cmd] + t_args, executable=cmd, stdout=PIPE, stderr=PIPE)
134 out, err = pipe.communicate()
135 if debug_runsvn_timing:
136 time2 = time.time()
137 print "(" + str(round(time2-time1,4)) + " elapsed)"
138 if out and runsvn_showout:
139 print out
140 if pipe.returncode != 0 or (fail_if_stderr and err.strip()):
141 display_error("External program failed (return code %d): %s\n%s"
142 % (pipe.returncode, cmd_string, err))
143 return out
144
145 def svn_date_to_timestamp(svn_date):
146 """
147 Parse an SVN date as read from the XML output and
148 return the corresponding timestamp.
149 """
150 # Strip microseconds and timezone (always UTC, hopefully)
151 # XXX there are various ISO datetime parsing routines out there,
152 # cf. http://seehuhn.de/comp/pdate
153 date = svn_date.split('.', 2)[0]
154 time_tuple = time.strptime(date, "%Y-%m-%dT%H:%M:%S")
155 return calendar.timegm(time_tuple)
156
157 def parse_svn_info_xml(xml_string):
158 """
159 Parse the XML output from an "svn info" command and extract
160 useful information as a dict.
161 """
162 d = {}
163 tree = ET.fromstring(xml_string)
164 entry = tree.find('.//entry')
165 if entry:
166 d['url'] = entry.find('url').text
167 d['revision'] = int(entry.get('revision'))
168 d['repos_url'] = tree.find('.//repository/root').text
169 d['last_changed_rev'] = int(tree.find('.//commit').get('revision'))
170 d['kind'] = entry.get('kind')
171 return d
172
173 def parse_svn_log_xml(xml_string):
174 """
175 Parse the XML output from an "svn log" command and extract
176 useful information as a list of dicts (one per log changeset).
177 """
178 l = []
179 tree = ET.fromstring(xml_string)
180 for entry in tree.findall('logentry'):
181 d = {}
182 d['revision'] = int(entry.get('revision'))
183 # Some revisions don't have authors, most notably
184 # the first revision in a repository.
185 author = entry.find('author')
186 d['author'] = author is not None and author.text or None
187 d['date'] = svn_date_to_timestamp(entry.find('date').text)
188 # Some revisions may have empty commit message
189 message = entry.find('msg')
190 message = message is not None and message.text is not None \
191 and message.text.strip() or ""
192 # Replace DOS return '\r\n' and MacOS return '\r' with unix return '\n'
193 d['message'] = message.replace('\r\n', '\n').replace('\n\r', '\n'). \
194 replace('\r', '\n')
195 paths = []
196 for path in entry.findall('.//path'):
197 copyfrom_rev = path.get('copyfrom-rev')
198 if copyfrom_rev:
199 copyfrom_rev = int(copyfrom_rev)
200 paths.append({
201 'path': path.text,
202 'kind': path.get('kind'),
203 'action': path.get('action'),
204 'copyfrom_path': path.get('copyfrom-path'),
205 'copyfrom_revision': copyfrom_rev,
206 })
207 # Need to sort paths (i.e. into hierarchical order), so that process_svn_log_entry()
208 # can process actions in depth-first order.
209 d['changed_paths'] = sorted(paths, key=itemgetter('path'))
210 l.append(d)
211 return l
212
213 def parse_svn_status_xml(xml_string, base_dir=None):
214 """
215 Parse the XML output from an "svn status" command and extract
216 useful info as a list of dicts (one per status entry).
217 """
218 l = []
219 tree = ET.fromstring(xml_string)
220 for entry in tree.findall('.//entry'):
221 d = {}
222 path = entry.get('path')
223 if base_dir is not None:
224 assert path.startswith(base_dir)
225 path = path[len(base_dir):].lstrip('/\\')
226 d['path'] = path
227 wc_status = entry.find('wc-status')
228 if wc_status.get('item') == 'external':
229 d['type'] = 'external'
230 # TODO: Optionally check wc_status.get('item') == 'deleted' and return type='unversioned'?
231 elif wc_status.get('revision') is not None:
232 d['type'] = 'normal'
233 else:
234 d['type'] = 'unversioned'
235 l.append(d)
236 return l
237
238 def get_svn_info(svn_url_or_wc, rev_number=None):
239 """
240 Get SVN information for the given URL or working copy,
241 with an optionally specified revision number.
242 Returns a dict as created by parse_svn_info_xml().
243 """
244 if rev_number is not None:
245 args = [svn_url_or_wc + "@" + str(rev_number)]
246 else:
247 args = [svn_url_or_wc]
248 xml_string = run_svn(svn_info_args + args, fail_if_stderr=True)
249 return parse_svn_info_xml(xml_string)
250
251 def svn_checkout(svn_url, checkout_dir, rev_number=None):
252 """
253 Checkout the given URL at an optional revision number.
254 """
255 args = []
256 if rev_number is not None:
257 args += ['-r', rev_number]
258 args += [svn_url, checkout_dir]
259 return run_svn(svn_checkout_args + args)
260
261 def run_svn_log(svn_url_or_wc, rev_start, rev_end, limit, stop_on_copy=False, get_changed_paths=True):
262 """
263 Fetch up to 'limit' SVN log entries between the given revisions.
264 """
265 if stop_on_copy:
266 args = ['--stop-on-copy']
267 else:
268 args = []
269 url = str(svn_url_or_wc)
270 if rev_start != 'HEAD' and rev_end != 'HEAD':
271 args += ['-r', '%s:%s' % (rev_start, rev_end)]
272 if not "@" in svn_url_or_wc:
273 url += "@" + str(max(rev_start, rev_end))
274 if get_changed_paths:
275 args += ['-v']
276 args += ['--limit', str(limit), url]
277 xml_string = run_svn(svn_log_args + args)
278 return parse_svn_log_xml(xml_string)
279
280 def get_svn_status(svn_wc, flags=None):
281 """
282 Get SVN status information about the given working copy.
283 """
284 # Ensure proper stripping by canonicalizing the path
285 svn_wc = os.path.abspath(svn_wc)
286 args = []
287 if flags:
288 args += [flags]
289 args += [svn_wc]
290 xml_string = run_svn(svn_status_args + args)
291 return parse_svn_status_xml(xml_string, svn_wc)
292
293 def get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=False, get_changed_paths=True):
294 """
295 Get the first SVN log entry in the requested revision range.
296 """
297 entries = run_svn_log(svn_url, rev_start, rev_end, 1, stop_on_copy, get_changed_paths)
298 if not entries:
299 display_error("No SVN log for %s between revisions %s and %s" %
300 (svn_url, rev_start, rev_end))
301
302 return entries[0]
303
304 def get_first_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
305 """
306 Get the first log entry after/at the given revision number in an SVN branch.
307 By default the revision number is set to 0, which will give you the log
308 entry corresponding to the branch creaction.
309
310 NOTE: to know whether the branch creation corresponds to an SVN import or
311 a copy from another branch, inspect elements of the 'changed_paths' entry
312 in the returned dictionary.
313 """
314 return get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True, get_changed_paths=True)
315
316 def get_last_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
317 """
318 Get the last log entry before/at the given revision number in an SVN branch.
319 By default the revision number is set to HEAD, which will give you the log
320 entry corresponding to the latest commit in branch.
321 """
322 return get_one_svn_log_entry(svn_url, rev_end, rev_start, stop_on_copy=True, get_changed_paths=True)
323
324
325 log_duration_threshold = 10.0
326 log_min_chunk_length = 10
327
328 def iter_svn_log_entries(svn_url, first_rev, last_rev):
329 """
330 Iterate over SVN log entries between first_rev and last_rev.
331
332 This function features chunked log fetching so that it isn't too nasty
333 to the SVN server if many entries are requested.
334 """
335 cur_rev = first_rev
336 chunk_length = log_min_chunk_length
337 chunk_interval_factor = 1.0
338 while last_rev == "HEAD" or cur_rev <= last_rev:
339 start_t = time.time()
340 stop_rev = min(last_rev, cur_rev + int(chunk_length * chunk_interval_factor))
341 entries = run_svn_log(svn_url, cur_rev, stop_rev, chunk_length)
342 duration = time.time() - start_t
343 if not entries:
344 if stop_rev == last_rev:
345 break
346 cur_rev = stop_rev + 1
347 chunk_interval_factor *= 2.0
348 continue
349 for e in entries:
350 yield e
351 cur_rev = e['revision'] + 1
352 # Adapt chunk length based on measured request duration
353 if duration < log_duration_threshold:
354 chunk_length = int(chunk_length * 2.0)
355 elif duration > log_duration_threshold * 2:
356 chunk_length = max(log_min_chunk_length, int(chunk_length / 2.0))
357
358 def commit_from_svn_log_entry(entry, files=None, keep_author=False):
359 """
360 Given an SVN log entry and an optional sequence of files, do an svn commit.
361 """
362 # TODO: Run optional external shell hook here, for doing pre-commit filtering
363 # This will use the local timezone for displaying commit times
364 timestamp = int(entry['date'])
365 svn_date = str(datetime.fromtimestamp(timestamp))
366 # Uncomment this one one if you prefer UTC commit times
367 #svn_date = "%d 0" % timestamp
368 if keep_author:
369 options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date, "--username", entry['author']]
370 else:
371 options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date + "\nAuthor: " + entry['author']]
372 if files:
373 options += list(files)
374 print "(Committing source rev #"+str(entry['revision'])+"...)"
375 run_svn(options)
376
377 def in_svn(p):
378 """
379 Check if a given file/folder is being tracked by Subversion.
380 Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
381 With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
382 Use "svn status" to check the status of the file/folder.
383 """
384 # TODO: Is there a better way to do this?
385 entries = get_svn_status(p)
386 if not entries:
387 return False
388 d = entries[0]
389 return (d['type'] == 'normal')
390
391 def find_svn_ancestors(source_repos_url, source_base, source_offset, copyfrom_path, copyfrom_rev):
392 """
393 Given a copy-from path (copyfrom_path), walk the SVN history backwards to inspect
394 the ancestory of that path. Build a collection of copyfrom_path+revision pairs
395 for each of the branch-copies since the initial branch-creation. If we find a
396 copyfrom_path which source_base is a substring match of (e.g. we crawled back to
397 the initial branch-copy from trunk), then return the collection of ancestor paths.
398 Otherwise, copyfrom_path has no ancestory compared to source_base.
399
400 This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
401 file/folder was renamed in a branch and then that branch was merged back to trunk.
402
403 PARAMETERS:
404 * source_repos_url = Full URL to root of repository, e.g. 'file:///path/to/repos'
405 * source_base = e.g. '/trunk'
406 * source_offset = e.g. 'projectA/file1.txt'
407 * copyfrom_path = e.g. '/branches/bug123/projectA/file1.txt'
408 """
409
410 done = False
411 working_path = copyfrom_path
412 working_base = copyfrom_path[:-len(source_offset)].rstrip('/')
413 working_offset = source_offset.strip('/')
414 working_rev = copyfrom_rev
415 ancestors = [{'path': [working_base, working_offset], 'revision': working_rev}]
416 while not done:
417 # Get the first "svn log" entry for this path (relative to @rev)
418 #working_path = working_base + "/" + working_offset
419 if debug:
420 print ">> find_svn_ancestors: " + source_repos_url + working_path + "@" + str(working_rev) + \
421 " (" + working_base + " " + working_offset + ")"
422 log_entry = get_first_svn_log_entry(source_repos_url + working_path + "@" + str(working_rev), 1, str(working_rev), True)
423 if not log_entry:
424 done = True
425 # Find the action for our working_path in this revision
426 for d in log_entry['changed_paths']:
427 path = d['path']
428 if not path in working_path:
429 continue
430 # Check action-type for this file
431 action = d['action']
432 if action not in 'MARD':
433 display_error("In SVN rev. %d: action '%s' not supported. \
434 Please report a bug!" % (log_entry['revision'], action))
435 if debug:
436 debug_desc = ": " + action + " " + path
437 if d['copyfrom_path']:
438 debug_desc += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
439 print debug_desc
440
441 if action == 'R':
442 # If file/folder was replaced, it has no ancestor
443 return []
444 if action == 'D':
445 # If file/folder was deleted, it has no ancestor
446 return []
447 if action == 'A':
448 # If file/folder was added but not a copy, it has no ancestor
449 if not d['copyfrom_path']:
450 return []
451 # Else, file/folder was added and is a copy, so check ancestors
452 path_old = d['copyfrom_path']
453 working_path = working_path.replace(path, path_old)
454 if working_base in working_path:
455 # If the new and old working_path share the same working_base, just need to update working_offset.
456 working_offset = working_path[len(working_base)+1:]
457 else:
458 # Else, assume that working_base has changed but working_offset is the same, e.g. a re-branch.
459 # TODO: Is this a safe assumption?!
460 working_base = working_path[:-len(working_offset)].rstrip('/')
461 working_rev = d['copyfrom_revision']
462 if debug:
463 print ">> find_svn_ancestors: copy-from: " + working_base + " " + working_offset + "@" + str(working_rev)
464 ancestors.append({'path': [working_base, working_offset], 'revision': working_rev})
465 # If we found a copy-from case which matches our source_base, we're done
466 if (path_old == source_base) or (path_old.startswith(source_base + "/")):
467 return ancestors
468 # Else, follow the copy and keep on searching
469 break
470 return None
471
472 def replay_svn_ancestors(ancestors, source_repos_url, source_url, target_url):
473 """
474 Given an array of ancestor info (find_svn_ancestors), replay the history
475 to correctly track renames ("svn copy/move") across branch-merges.
476
477 For example, consider a sequence of events like this:
478 1. svn copy /trunk /branches/fix1
479 2. (Make some changes on /branches/fix1)
480 3. svn copy /branches/fix1/Proj1 /branches/fix1/Proj2 " Rename folder
481 4. svn copy /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt " Rename file inside renamed folder
482 5. svn co /trunk && svn merge /branches/fix1
483 After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
484 and and add of /trunk/Proj2 comp-from /branches/fix1/Proj2. If we were just
485 to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
486 we'd lose the logical history that Proj2/file2.txt is really a descendant
487 of Proj1/file1.txt.
488
489 'source_repos_url' is the full URL to the root of the source repository.
490 'ancestors' is the array returned by find_svn_ancestors() with the final
491 destination info appended to it by process_svn_log_entry().
492 'dest_path'
493 """
494 # Ignore ancestors[0], which is the original (pre-branch-copy) trunk path
495 # Ignore ancestors[1], which is the original branch-creation commit
496 # Ignore ancestors[n], which is the final commit back to trunk
497 for idx in range(1, len(ancestors)-1):
498 ancestor = ancestors[idx]
499 source_base = ancestor['path'][0]
500 source_offset = ancestor['path'][1]
501 source_path = source_base + "/" + source_offset
502 source_rev = ancestor['revision']
503 source_rev_next = ancestors[idx+1]['revision']
504 # Do a "svn log" on the _parent_ directory of source_path, since trying to get log info
505 # for the "old path" on the revision where the copy/move happened will fail.
506 if "/" in source_path:
507 p_source_path = source_path[:source_path.rindex('/')]
508 else:
509 p_source_path = ""
510 if debug:
511 print ">> replay_svn_ancestors: ["+str(idx)+"]" + source_path+"@"+str(source_rev) + " ["+p_source_path+"@"+str(source_rev)+":"+str(source_rev_next-1)+"]"
512 it_log_entries = iter_svn_log_entries(source_repos_url+p_source_path, source_rev, source_rev_next-1)
513 for log_entry in it_log_entries:
514 #print ">> replay_svn_ancestors: log_entry: (" + source_repos_url+source_base + ")"
515 #print log_entry
516 # TODO: Hit a problem case with a rename-situation where the "remove" was committed ahead of the "add (copy)".
517 # Do we maybe need to buffer all the remove's until the end of the entire replay session?
518 # Or can we maybe work around this by passing an explicit rev # into "svn copy"?
519 process_svn_log_entry(log_entry, source_repos_url, source_repos_url+source_base, target_url)
520
521 def process_svn_log_entry(log_entry, source_repos_url, source_url, target_url):
522 """
523 Process SVN changes from the given log entry.
524 Returns array of all the paths in the working-copy that were changed,
525 i.e. the paths which need to be "svn commit".
526
527 'log_entry' is the array structure built by parse_svn_log_xml().
528 'source_repos_url' is the full URL to the root of the source repository.
529 'source_url' is the full URL to the source path in the source repository.
530 'target_url' is the full URL to the target path in the target repository.
531 """
532 # Get the relative offset of source_url based on source_repos_url, e.g. u'/branches/bug123'
533 source_base = source_url[len(source_repos_url):]
534 if debug:
535 print ">> process_svn_log_entry: " + source_url + " (" + source_base + ")"
536
537 svn_rev = log_entry['revision']
538 # Get current target revision, for "svn copy" support
539 dup_info = get_svn_info(target_url)
540 dup_rev = dup_info['revision']
541
542 removed_paths = []
543 unrelated_paths = []
544 commit_paths = []
545
546 for d in log_entry['changed_paths']:
547 # Get the full path for this changed_path
548 # e.g. u'/branches/bug123/projectA/file1.txt'
549 path = d['path']
550 if not path.startswith(source_base + "/"):
551 # Ignore changed files that are not part of this subdir
552 if path != source_base:
553 print ">> process_svn_log_entry: Unrelated path: " + path + " (" + source_base + ")"
554 unrelated_paths.append(path)
555 continue
556 # Calculate the offset (based on source_base) for this changed_path
557 # e.g. u'projectA/file1.txt'
558 # (path = source_base + "/" + path_offset)
559 path_offset = path[len(source_base):].strip("/")
560 # Get the action for this path
561 action = d['action']
562 if action not in 'MARD':
563 display_error("In SVN rev. %d: action '%s' not supported. \
564 Please report a bug!" % (svn_rev, action))
565
566 # Try to be efficient and keep track of an explicit list of paths in the
567 # working copy that changed. If we commit from the root of the working copy,
568 # then SVN needs to crawl the entire working copy looking for pending changes.
569 # But, if we gather too many paths to commit, then we wipe commit_paths below
570 # and end-up doing a commit at the root of the working-copy.
571 if len (commit_paths) < 100:
572 commit_paths.append(path_offset)
573
574 # Special-handling for replace's
575 is_replace = False
576 if action == 'R':
577 if svnlog_verbose:
578 msg = " " + action + " " + d['path']
579 if d['copyfrom_path']:
580 msg += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
581 print msg
582 # If file was "replaced" (deleted then re-added, all in same revision),
583 # then we need to run the "svn rm" first, then change action='A'. This
584 # lets the normal code below handle re-"svn add"'ing the files. This
585 # should replicate the "replace".
586 run_svn(["up", path_offset])
587 run_svn(["remove", "--force", path_offset])
588 action = 'A'
589 is_replace = True
590
591 # Handle all the various action-types
592 # (Handle "add" first, for "svn copy/move" support)
593 if action == 'A':
594 if svnlog_verbose:
595 msg = " " + action + " " + d['path']
596 if d['copyfrom_path']:
597 msg += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
598 print msg
599 # Determine where to export from
600 copyfrom_rev = svn_rev
601 copyfrom_path = path
602 svn_copy = False
603 # Handle cases where this "add" was a copy from another URL in the source repos
604 if d['copyfrom_revision']:
605 copyfrom_rev = d['copyfrom_revision']
606 copyfrom_path = d['copyfrom_path']
607 if debug:
608 print ">> process_svn_log_entry: copy-to: " + source_base + " " + path_offset
609 if source_base in copyfrom_path:
610 # If the copy-from path is inside the current working-copy, no need to check ancestry.
611 ancestors = []
612 copyfrom_path = copyfrom_path[len(source_base):].strip("/")
613 if debug:
614 print ">> process_svn_log_entry: Found copy: " + copyfrom_path+"@"+str(copyfrom_rev)
615 svn_copy = True
616 else:
617 ancestors = find_svn_ancestors(source_repos_url, source_base, path_offset,
618 copyfrom_path, copyfrom_rev)
619 if ancestors:
620 # Reverse the list, so that we loop in chronological order
621 ancestors.reverse()
622 # Append the current revision
623 ancestors.append({'path': [source_base, path_offset], 'revision': svn_rev})
624 # ancestors[0] is the original (pre-branch-copy) trunk path.
625 # ancestors[1] is the first commit on the new branch.
626 copyfrom_rev = ancestors[0]['revision']
627 copyfrom_base = ancestors[0]['path'][0]
628 copyfrom_offset = ancestors[0]['path'][1]
629 copyfrom_path = copyfrom_base + copyfrom_offset
630 if debug:
631 print ">> process_svn_log_entry: FOUND PARENT:"
632 for idx in range(0,len(ancestors)):
633 ancestor = ancestors[idx]
634 print " ["+str(idx)+"] " + ancestor['path'][0]+" "+ancestor['path'][1]+"@"+str(ancestor['revision'])
635 #print ">> process_svn_log_entry: copyfrom_path (before): " + copyfrom_path + " source_base: " + source_base + " p: " + p
636 copyfrom_path = copyfrom_path[len(source_base):].strip("/")
637 #print ">> process_svn_log_entry: copyfrom_path (after): " + copyfrom_path
638 svn_copy = True
639 # If this add was a copy-from, do a smart replay of the ancestors' history.
640 if svn_copy:
641 if debug:
642 print ">> process_svn_log_entry: svn_copy: copy-from: " + copyfrom_path+"@"+str(copyfrom_rev) + " source_base: "+source_base + " len(ancestors): " + str(len(ancestors))
643 # If we don't have any ancestors, then this is just a straight "svn copy" in the current working-copy.
644 if not ancestors:
645 # ...but not if the target is already tracked, because this might run several times for the same path.
646 # TODO: Is there a better way to avoid recusion bugs? Maybe a collection of processed paths?
647 # TODO: The "not in_svn" check creates problems for action="R" cases, e.g. r18834
648 if (not in_svn(path_offset)) or is_replace:
649 if os.path.exists(copyfrom_path):
650 # If the copyfrom_path exists in the working-copy, do a local copy
651 run_svn(["copy", copyfrom_path, path_offset])
652 else:
653 # TODO: This doesn't respect copyfrom_rev at all. Found a case where file was (accidentally?)
654 # deleted in one commit and restored (added copy-from) in a latter commit. Do we maybe
655 # need a mapping table of target_url -> source_url rev #'s, so that given a source_url
656 # copyfrom_rev, we can map that to the equiv target_url rev#, so we do the "svn copy"
657 # here correctly?
658 tmp_rev = dup_rev # Kludge for time-being
659 if copyfrom_path == 'Data/Databases/DBUpdate.mdb' and copyfrom_rev == 17568:
660 tmp_rev = dup_rev-10
661 run_svn(["copy", "-r", tmp_rev, target_url+"/"+copyfrom_path+"@"+str(tmp_rev), path_offset])
662 else:
663 if d['kind'] == 'dir':
664 # Replay any actions which happened to this folder from the ancestor path(s).
665 replay_svn_ancestors(ancestors, source_repos_url, source_url, target_url)
666 else:
667 # Just do a straight "svn copy" for files. There isn't any kind of "dependent"
668 # history we might need to replay like for folders.
669 # TODO: Is this logic really correct? Doing a WC vs URL "svn copy" based on existence
670 # of *source* location seems a bit kludgy. Should there be a running list of
671 # renames during replay_svn_ancestors >> process_svn_log_entry?
672 if os.path.exists(copyfrom_path):
673 # If the copyfrom_path exists in the working-copy, do a local copy
674 run_svn(["copy", copyfrom_path, path_offset])
675 else:
676 # Else, could be a situation where replay_svn_ancestors() is replaying branch
677 # history and a copy was committed across two revisions: first the deletion
678 # followed by the later add. In such a case, we need to copy from HEAD (dup_rev)
679 # of the path in *target_url*
680 run_svn(["copy", "-r", dup_rev, target_url+"/"+copyfrom_path+"@"+str(dup_rev), path_offset])
681 # Else just copy/export the files from the source repo and "svn add" them.
682 else:
683 # Create (parent) directory if needed
684 if d['kind'] == 'dir':
685 p_path = path_offset
686 else:
687 p_path = os.path.dirname(path_offset).strip() or '.'
688 if not os.path.exists(p_path):
689 os.makedirs(p_path)
690 # Export the entire added tree.
691 run_svn(["export", "--force", "-r", str(copyfrom_rev),
692 source_repos_url + copyfrom_path + "@" + str(copyfrom_rev), path_offset])
693 # TODO: The "no in_svn" condition here is wrong for replace cases.
694 # Added the in_svn condition here originally since "svn export" is recursive
695 # but "svn log" will have an entry for each indiv file, hence we run into a
696 # cannot-re-add-file-which-is-already-added issue.
697 if (not in_svn(path_offset)) or (is_replace):
698 run_svn(["add", "--parents", path_offset])
699 # TODO: Need to copy SVN properties from source repos
700
701 elif action == 'D':
702 # Queue "svn remove" commands, to allow the action == 'A' handling the opportunity
703 # to do smart "svn copy" handling on copy/move/renames.
704 removed_paths.append(path_offset)
705
706 elif action == 'M':
707 if svnlog_verbose:
708 print " " + action + " " + d['path']
709 out = run_svn(["merge", "-c", str(svn_rev), "--non-recursive",
710 "--non-interactive", "--accept=theirs-full",
711 source_url+"/"+path_offset+"@"+str(svn_rev), path_offset])
712
713 else:
714 display_error("Internal Error: pull_svn_rev: Unhandled 'action' value: '" + action + "'")
715
716 if removed_paths:
717 for path_offset in removed_paths:
718 if svnlog_verbose:
719 print " D " + source_base+"/"+path_offset
720 run_svn(["remove", "--force", path_offset])
721
722 if unrelated_paths:
723 print "Unrelated paths: (vs. '" + source_base + "')"
724 print "*", unrelated_paths
725
726 return commit_paths
727
728 def pull_svn_rev(log_entry, source_repos_url, source_url, target_url, keep_author=False):
729 """
730 Pull SVN changes from the given log entry.
731 Returns the new SVN revision.
732 If an exception occurs, it will rollback to revision 'svn_rev - 1'.
733 """
734 ## Get the relative offset of source_url based on source_repos_url, e.g. u'/branches/bug123'
735 #source_base = source_url[len(source_repos_url):]
736
737 svn_rev = log_entry['revision']
738 print "\n(Starting source rev #"+str(svn_rev)+":)"
739 print "r"+str(log_entry['revision']) + " | " + \
740 log_entry['author'] + " | " + \
741 str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' '))
742 print log_entry['message']
743 print "------------------------------------------------------------------------"
744 commit_paths = process_svn_log_entry(log_entry, source_repos_url, source_url, target_url)
745
746 # If we had too many individual paths to commit, wipe the list and just commit at
747 # the root of the working copy.
748 if len (commit_paths) > 99:
749 commit_paths = []
750
751 # TODO: Use SVN properties to track source URL + rev in the target repo?
752 # This would provide a more reliable resume-support
753 try:
754 commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author)
755 except ExternalCommandFailed:
756 # try to ignore the Properties conflicts on files and dirs
757 # use the copy from original_wc
758 # TODO: Need to re-work this?
759 #has_Conflict = False
760 #for d in log_entry['changed_paths']:
761 # p = d['path']
762 # p = p[len(source_base):].strip("/")
763 # if os.path.isfile(p):
764 # if os.path.isfile(p + ".prej"):
765 # has_Conflict = True
766 # shutil.copy(original_wc + os.sep + p, p)
767 # p2=os.sep + p.replace('_', '__').replace('/', '_') \
768 # + ".prej-" + str(svn_rev)
769 # shutil.move(p + ".prej", os.path.dirname(original_wc) + p2)
770 # w="\n### Properties conflicts ignored:"
771 # print "%s %s, in revision: %s\n" % (w, p, svn_rev)
772 # elif os.path.isdir(p):
773 # if os.path.isfile(p + os.sep + "dir_conflicts.prej"):
774 # has_Conflict = True
775 # p2=os.sep + p.replace('_', '__').replace('/', '_') \
776 # + "_dir__conflicts.prej-" + str(svn_rev)
777 # shutil.move(p + os.sep + "dir_conflicts.prej",
778 # os.path.dirname(original_wc) + p2)
779 # w="\n### Properties conflicts ignored:"
780 # print "%s %s, in revision: %s\n" % (w, p, svn_rev)
781 # out = run_svn(["propget", "svn:ignore",
782 # original_wc + os.sep + p])
783 # if out:
784 # run_svn(["propset", "svn:ignore", out.strip(), p])
785 # out = run_svn(["propget", "svn:externel",
786 # original_wc + os.sep + p])
787 # if out:
788 # run_svn(["propset", "svn:external", out.strip(), p])
789 ## try again
790 #if has_Conflict:
791 # commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author)
792 #else:
793 raise ExternalCommandFailed
794 print "(Finished source rev #"+str(svn_rev)+")"
795
796
797 def main():
798 usage = "Usage: %prog [-a] [-c] [-r SVN rev] <Source SVN URL> <Target SVN URL>"
799 parser = OptionParser(usage)
800 parser.add_option("-a", "--keep-author", action="store_true",
801 dest="keep_author", help="Keep revision Author or not")
802 parser.add_option("-c", "--continue-from-break", action="store_true",
803 dest="cont_from_break",
804 help="Continue from previous break")
805 parser.add_option("-r", "--svn-rev", type="int", dest="svn_rev",
806 help="SVN revision to checkout from")
807 (options, args) = parser.parse_args()
808 if len(args) != 2:
809 display_error("incorrect number of arguments\n\nTry: svn2svn.py --help",
810 False)
811
812 source_url = args.pop(0).rstrip("/")
813 target_url = args.pop(0).rstrip("/")
814 if options.keep_author:
815 keep_author = True
816 else:
817 keep_author = False
818
819 # Find the greatest_rev in the source repo
820 svn_info = get_svn_info(source_url)
821 greatest_rev = svn_info['revision']
822
823 dup_wc = "_dup_wc"
824
825 # if old working copy does not exist, disable continue mode
826 # TODO: Better continue support. Maybe include source repo's rev # in target commit info?
827 if not os.path.exists(dup_wc):
828 options.cont_from_break = False
829
830 if not options.cont_from_break:
831 # Warn if Target SVN URL existed
832 cmd = find_program("svn")
833 pipe = Popen([cmd] + ["list"] + [target_url], executable=cmd,
834 stdout=PIPE, stderr=PIPE)
835 out, err = pipe.communicate()
836 if pipe.returncode == 0:
837 print "Target SVN URL: %s existed!" % target_url
838 if out:
839 print out
840 print "Press 'Enter' to Continue, 'Ctrl + C' to Cancel..."
841 print "(Timeout in 5 seconds)"
842 rfds, wfds, efds = select.select([sys.stdin], [], [], 5)
843
844 # Get log entry for the SVN revision we will check out
845 if options.svn_rev:
846 # If specify a rev, get log entry just before or at rev
847 svn_start_log = get_last_svn_log_entry(source_url, 1, options.svn_rev, False)
848 else:
849 # Otherwise, get log entry of branch creation
850 # TODO: This call is *very* expensive on a repo with lots of revisions.
851 # Even though the call is passing --limit 1, it seems like that limit-filter
852 # is happening after SVN has fetched the full log history.
853 svn_start_log = get_first_svn_log_entry(source_url, 1, greatest_rev, False)
854
855 # This is the revision we will start from for source_url
856 svn_rev = svn_start_log['revision']
857
858 # Check out a working copy of target_url
859 dup_wc = os.path.abspath(dup_wc)
860 if os.path.exists(dup_wc):
861 shutil.rmtree(dup_wc)
862 svn_checkout(target_url, dup_wc)
863 os.chdir(dup_wc)
864
865 # For the initial commit to the target URL, export all the contents from
866 # the source URL at the start-revision.
867 paths = run_svn(["list", "-r", str(svn_rev), source_url+"@"+str(svn_rev)])
868 paths = paths.strip("\n").split("\n")
869 for path in paths:
870 if not path:
871 # Skip null lines
872 break
873 # Directories have a trailing slash in the "svn list" output
874 if path[-1] == "/":
875 path=path.rstrip('/')
876 if not os.path.exists(path):
877 os.makedirs(path)
878 run_svn(["export", "--force", "-r" , str(svn_rev), source_url+"/"+path+"@"+str(svn_rev), path])
879 run_svn(["add", path])
880 commit_from_svn_log_entry(svn_start_log, [], keep_author)
881 else:
882 dup_wc = os.path.abspath(dup_wc)
883 os.chdir(dup_wc)
884 # TODO: Need better resume support. For the time being, expect caller explictly passes in resume revision.
885 svn_rev = options.svn_rev
886 if svn_rev < 1:
887 display_error("Invalid arguments\n\nNeed to pass result rev # (-r) when using continue-mode (-c)", False)
888
889
890 # Get SVN info
891 svn_info = get_svn_info(source_url)
892 # Get the base URL for the source repos, e.g. u'svn://svn.example.com/svn/repo'
893 source_repos_url = svn_info['repos_url']
894
895 # Load SVN log starting from svn_rev + 1
896 it_log_entries = iter_svn_log_entries(source_url, svn_rev + 1, greatest_rev)
897
898 try:
899 for log_entry in it_log_entries:
900 # Replay this revision from source_url into target_url
901 pull_svn_rev(log_entry, source_repos_url, source_url, target_url, keep_author)
902 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
903 run_svn(["up", dup_wc])
904
905 except KeyboardInterrupt:
906 print "\nStopped by user."
907 run_svn(["cleanup"])
908 run_svn(["revert", "--recursive", "."])
909 except:
910 print "\nCommand failed with following error:\n"
911 traceback.print_exc()
912 run_svn(["cleanup"])
913 run_svn(["revert", "--recursive", "."])
914 finally:
915 run_svn(["up"])
916 print "\nFinished!"
917
918
919 if __name__ == "__main__":
920 main()
921
922 # vim:sts=4:sw=4: