]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn.py
Add source-tracking revprop's
[svn2svn.git] / svn2svn.py
1 #!/usr/bin/env python
2 """
3 svn2svn.py
4
5 Replicate (replay) changesets from one SVN repository to another:
6 * Maintains full logical history (e.g. uses "svn copy" for renames).
7 * Maintains original commit messages.
8 * Cannot maintain original commit date, but appends original commit date
9 for each commit message: "Date: %d".
10 * Optionally maintain source author info. (Only supported if accessing
11 target SVN repo via file://)
12 * Optionally run an external shell script before each replayed commit
13 to give the ability to dynamically exclude or modify files as part
14 of the replay.
15
16 License: GPLv2, the same as hgsvn.
17 Author: Tony Duckles (https://github.com/tonyduckles/svn2svn)
18 (This is a forked and modified verison of http://code.google.com/p/svn2svn/)
19 """
20
21 import os
22 import sys
23 import time
24 import locale
25 import shutil
26 import select
27 import calendar
28 import traceback
29
30 from optparse import OptionParser
31 from subprocess import Popen, PIPE
32 from datetime import datetime
33 from operator import itemgetter
34
35 try:
36 from xml.etree import cElementTree as ET
37 except ImportError:
38 try:
39 from xml.etree import ElementTree as ET
40 except ImportError:
41 try:
42 import cElementTree as ET
43 except ImportError:
44 from elementtree import ElementTree as ET
45
46 svn_log_args = ['log', '--xml']
47 svn_info_args = ['info', '--xml']
48 svn_checkout_args = ['checkout', '-q']
49 svn_status_args = ['status', '--xml', '-v', '--ignore-externals']
50
51 # Setup debug options
52 debug = False
53 debug_runsvn_timing = False # Display how long each "svn" OS command took to run?
54 # Setup verbosity options
55 runsvn_showcmd = False # Display every "svn" OS command we run?
56 runsvn_showout = False # Display the stdout results from every "svn" OS command we run?
57 svnlog_verbose = True # Display each action + changed-path as we walk the history?
58
59 # define exception class
60 class ExternalCommandFailed(RuntimeError):
61 """
62 An external command failed.
63 """
64
65 def display_error(message, raise_exception = True):
66 """
67 Display error message, then terminate.
68 """
69 print "Error:", message
70 print
71 if raise_exception:
72 raise ExternalCommandFailed
73 else:
74 sys.exit(1)
75
76 # Windows compatibility code by Bill Baxter
77 if os.name == "nt":
78 def find_program(name):
79 """
80 Find the name of the program for Popen.
81 Windows is finnicky about having the complete file name. Popen
82 won't search the %PATH% for you automatically.
83 (Adapted from ctypes.find_library)
84 """
85 # See MSDN for the REAL search order.
86 base, ext = os.path.splitext(name)
87 if ext:
88 exts = [ext]
89 else:
90 exts = ['.bat', '.exe']
91 for directory in os.environ['PATH'].split(os.pathsep):
92 for e in exts:
93 fname = os.path.join(directory, base + e)
94 if os.path.exists(fname):
95 return fname
96 return None
97 else:
98 def find_program(name):
99 """
100 Find the name of the program for Popen.
101 On Unix, popen isn't picky about having absolute paths.
102 """
103 return name
104
105 def shell_quote(s):
106 if os.name == "nt":
107 q = '"'
108 else:
109 q = "'"
110 return q + s.replace('\\', '\\\\').replace("'", "'\"'\"'") + q
111
112 locale_encoding = locale.getpreferredencoding()
113
114 def run_svn(args, fail_if_stderr=False, encoding="utf-8"):
115 """
116 Run svn cmd in PIPE
117 exit if svn cmd failed
118 """
119 def _transform_arg(a):
120 if isinstance(a, unicode):
121 a = a.encode(encoding or locale_encoding)
122 elif not isinstance(a, str):
123 a = str(a)
124 return a
125 t_args = map(_transform_arg, args)
126
127 cmd = find_program("svn")
128 cmd_string = str(" ".join(map(shell_quote, [cmd] + t_args)))
129 if runsvn_showcmd:
130 print "$", "("+os.getcwd()+")", cmd_string
131 if debug_runsvn_timing:
132 time1 = time.time()
133 pipe = Popen([cmd] + t_args, executable=cmd, stdout=PIPE, stderr=PIPE)
134 out, err = pipe.communicate()
135 if debug_runsvn_timing:
136 time2 = time.time()
137 print "(" + str(round(time2-time1,4)) + " elapsed)"
138 if out and runsvn_showout:
139 print out
140 if pipe.returncode != 0 or (fail_if_stderr and err.strip()):
141 display_error("External program failed (return code %d): %s\n%s"
142 % (pipe.returncode, cmd_string, err))
143 return out
144
145 def svn_date_to_timestamp(svn_date):
146 """
147 Parse an SVN date as read from the XML output and
148 return the corresponding timestamp.
149 """
150 # Strip microseconds and timezone (always UTC, hopefully)
151 # XXX there are various ISO datetime parsing routines out there,
152 # cf. http://seehuhn.de/comp/pdate
153 date = svn_date.split('.', 2)[0]
154 time_tuple = time.strptime(date, "%Y-%m-%dT%H:%M:%S")
155 return calendar.timegm(time_tuple)
156
157 def parse_svn_info_xml(xml_string):
158 """
159 Parse the XML output from an "svn info" command and extract
160 useful information as a dict.
161 """
162 d = {}
163 tree = ET.fromstring(xml_string)
164 entry = tree.find('.//entry')
165 if entry:
166 d['url'] = entry.find('url').text
167 d['revision'] = int(entry.get('revision'))
168 d['repos_url'] = tree.find('.//repository/root').text
169 d['repos_uuid'] = tree.find('.//repository/uuid').text
170 d['last_changed_rev'] = int(tree.find('.//commit').get('revision'))
171 d['kind'] = entry.get('kind')
172 return d
173
174 def parse_svn_log_xml(xml_string):
175 """
176 Parse the XML output from an "svn log" command and extract
177 useful information as a list of dicts (one per log changeset).
178 """
179 l = []
180 tree = ET.fromstring(xml_string)
181 for entry in tree.findall('logentry'):
182 d = {}
183 d['revision'] = int(entry.get('revision'))
184 # Some revisions don't have authors, most notably
185 # the first revision in a repository.
186 author = entry.find('author')
187 d['author'] = author is not None and author.text or None
188 d['date'] = svn_date_to_timestamp(entry.find('date').text)
189 # Some revisions may have empty commit message
190 message = entry.find('msg')
191 message = message is not None and message.text is not None \
192 and message.text.strip() or ""
193 # Replace DOS return '\r\n' and MacOS return '\r' with unix return '\n'
194 d['message'] = message.replace('\r\n', '\n').replace('\n\r', '\n'). \
195 replace('\r', '\n')
196 paths = []
197 for path in entry.findall('.//path'):
198 copyfrom_rev = path.get('copyfrom-rev')
199 if copyfrom_rev:
200 copyfrom_rev = int(copyfrom_rev)
201 paths.append({
202 'path': path.text,
203 'kind': path.get('kind'),
204 'action': path.get('action'),
205 'copyfrom_path': path.get('copyfrom-path'),
206 'copyfrom_revision': copyfrom_rev,
207 })
208 # Need to sort paths (i.e. into hierarchical order), so that process_svn_log_entry()
209 # can process actions in depth-first order.
210 d['changed_paths'] = sorted(paths, key=itemgetter('path'))
211 l.append(d)
212 return l
213
214 def parse_svn_status_xml(xml_string, base_dir=None):
215 """
216 Parse the XML output from an "svn status" command and extract
217 useful info as a list of dicts (one per status entry).
218 """
219 l = []
220 tree = ET.fromstring(xml_string)
221 for entry in tree.findall('.//entry'):
222 d = {}
223 path = entry.get('path')
224 if base_dir is not None:
225 assert path.startswith(base_dir)
226 path = path[len(base_dir):].lstrip('/\\')
227 d['path'] = path
228 wc_status = entry.find('wc-status')
229 if wc_status.get('item') == 'external':
230 d['type'] = 'external'
231 # TODO: Optionally check wc_status.get('item') == 'deleted' and return type='unversioned'?
232 elif wc_status.get('revision') is not None:
233 d['type'] = 'normal'
234 else:
235 d['type'] = 'unversioned'
236 l.append(d)
237 return l
238
239 def get_svn_info(svn_url_or_wc, rev_number=None):
240 """
241 Get SVN information for the given URL or working copy,
242 with an optionally specified revision number.
243 Returns a dict as created by parse_svn_info_xml().
244 """
245 if rev_number is not None:
246 args = [svn_url_or_wc + "@" + str(rev_number)]
247 else:
248 args = [svn_url_or_wc]
249 xml_string = run_svn(svn_info_args + args, fail_if_stderr=True)
250 return parse_svn_info_xml(xml_string)
251
252 def svn_checkout(svn_url, checkout_dir, rev_number=None):
253 """
254 Checkout the given URL at an optional revision number.
255 """
256 args = []
257 if rev_number is not None:
258 args += ['-r', rev_number]
259 args += [svn_url, checkout_dir]
260 return run_svn(svn_checkout_args + args)
261
262 def run_svn_log(svn_url_or_wc, rev_start, rev_end, limit, stop_on_copy=False, get_changed_paths=True):
263 """
264 Fetch up to 'limit' SVN log entries between the given revisions.
265 """
266 if stop_on_copy:
267 args = ['--stop-on-copy']
268 else:
269 args = []
270 url = str(svn_url_or_wc)
271 if rev_start != 'HEAD' and rev_end != 'HEAD':
272 args += ['-r', '%s:%s' % (rev_start, rev_end)]
273 if not "@" in svn_url_or_wc:
274 url += "@" + str(max(rev_start, rev_end))
275 if get_changed_paths:
276 args += ['-v']
277 args += ['--limit', str(limit), url]
278 xml_string = run_svn(svn_log_args + args)
279 return parse_svn_log_xml(xml_string)
280
281 def get_svn_status(svn_wc, flags=None):
282 """
283 Get SVN status information about the given working copy.
284 """
285 # Ensure proper stripping by canonicalizing the path
286 svn_wc = os.path.abspath(svn_wc)
287 args = []
288 if flags:
289 args += [flags]
290 args += [svn_wc]
291 xml_string = run_svn(svn_status_args + args)
292 return parse_svn_status_xml(xml_string, svn_wc)
293
294 def get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=False, get_changed_paths=True):
295 """
296 Get the first SVN log entry in the requested revision range.
297 """
298 entries = run_svn_log(svn_url, rev_start, rev_end, 1, stop_on_copy, get_changed_paths)
299 if not entries:
300 display_error("No SVN log for %s between revisions %s and %s" %
301 (svn_url, rev_start, rev_end))
302
303 return entries[0]
304
305 def get_first_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
306 """
307 Get the first log entry after/at the given revision number in an SVN branch.
308 By default the revision number is set to 0, which will give you the log
309 entry corresponding to the branch creaction.
310
311 NOTE: to know whether the branch creation corresponds to an SVN import or
312 a copy from another branch, inspect elements of the 'changed_paths' entry
313 in the returned dictionary.
314 """
315 return get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True, get_changed_paths=True)
316
317 def get_last_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
318 """
319 Get the last log entry before/at the given revision number in an SVN branch.
320 By default the revision number is set to HEAD, which will give you the log
321 entry corresponding to the latest commit in branch.
322 """
323 return get_one_svn_log_entry(svn_url, rev_end, rev_start, stop_on_copy=True, get_changed_paths=True)
324
325
326 log_duration_threshold = 10.0
327 log_min_chunk_length = 10
328
329 def iter_svn_log_entries(svn_url, first_rev, last_rev):
330 """
331 Iterate over SVN log entries between first_rev and last_rev.
332
333 This function features chunked log fetching so that it isn't too nasty
334 to the SVN server if many entries are requested.
335 """
336 cur_rev = first_rev
337 chunk_length = log_min_chunk_length
338 chunk_interval_factor = 1.0
339 while last_rev == "HEAD" or cur_rev <= last_rev:
340 start_t = time.time()
341 stop_rev = min(last_rev, cur_rev + int(chunk_length * chunk_interval_factor))
342 entries = run_svn_log(svn_url, cur_rev, stop_rev, chunk_length)
343 duration = time.time() - start_t
344 if not entries:
345 if stop_rev == last_rev:
346 break
347 cur_rev = stop_rev + 1
348 chunk_interval_factor *= 2.0
349 continue
350 for e in entries:
351 yield e
352 cur_rev = e['revision'] + 1
353 # Adapt chunk length based on measured request duration
354 if duration < log_duration_threshold:
355 chunk_length = int(chunk_length * 2.0)
356 elif duration > log_duration_threshold * 2:
357 chunk_length = max(log_min_chunk_length, int(chunk_length / 2.0))
358
359 def commit_from_svn_log_entry(entry, files=None, keep_author=False):
360 """
361 Given an SVN log entry and an optional sequence of files, do an svn commit.
362 """
363 # TODO: Run optional external shell hook here, for doing pre-commit filtering
364 # This will use the local timezone for displaying commit times
365 timestamp = int(entry['date'])
366 svn_date = str(datetime.fromtimestamp(timestamp))
367 # Uncomment this one one if you prefer UTC commit times
368 #svn_date = "%d 0" % timestamp
369 if keep_author:
370 options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date, "--username", entry['author']]
371 else:
372 options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date + "\nAuthor: " + entry['author']]
373 if files:
374 options += list(files)
375 print "(Committing source rev #"+str(entry['revision'])+"...)"
376 run_svn(options)
377
378 def in_svn(p):
379 """
380 Check if a given file/folder is being tracked by Subversion.
381 Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
382 With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
383 Use "svn status" to check the status of the file/folder.
384 """
385 # TODO: Is there a better way to do this?
386 entries = get_svn_status(p)
387 if not entries:
388 return False
389 d = entries[0]
390 return (d['type'] == 'normal')
391
392 def find_svn_ancestors(source_repos_url, source_base, source_offset, copyfrom_path, copyfrom_rev):
393 """
394 Given a copy-from path (copyfrom_path), walk the SVN history backwards to inspect
395 the ancestory of that path. Build a collection of copyfrom_path+revision pairs
396 for each of the branch-copies since the initial branch-creation. If we find a
397 copyfrom_path which source_base is a substring match of (e.g. we crawled back to
398 the initial branch-copy from trunk), then return the collection of ancestor paths.
399 Otherwise, copyfrom_path has no ancestory compared to source_base.
400
401 This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
402 file/folder was renamed in a branch and then that branch was merged back to trunk.
403
404 PARAMETERS:
405 * source_repos_url = Full URL to root of repository, e.g. 'file:///path/to/repos'
406 * source_base = e.g. '/trunk'
407 * source_offset = e.g. 'projectA/file1.txt'
408 * copyfrom_path = e.g. '/branches/bug123/projectA/file1.txt'
409 """
410
411 done = False
412 working_path = copyfrom_path
413 working_base = copyfrom_path[:-len(source_offset)].rstrip('/')
414 working_offset = source_offset.strip('/')
415 working_rev = copyfrom_rev
416 ancestors = [{'path': [working_base, working_offset], 'revision': working_rev}]
417 while not done:
418 # Get the first "svn log" entry for this path (relative to @rev)
419 #working_path = working_base + "/" + working_offset
420 if debug:
421 print ">> find_svn_ancestors: " + source_repos_url + working_path + "@" + str(working_rev) + \
422 " (" + working_base + " " + working_offset + ")"
423 log_entry = get_first_svn_log_entry(source_repos_url + working_path + "@" + str(working_rev), 1, str(working_rev), True)
424 if not log_entry:
425 done = True
426 # Find the action for our working_path in this revision
427 for d in log_entry['changed_paths']:
428 path = d['path']
429 if not path in working_path:
430 continue
431 # Check action-type for this file
432 action = d['action']
433 if action not in 'MARD':
434 display_error("In SVN rev. %d: action '%s' not supported. \
435 Please report a bug!" % (log_entry['revision'], action))
436 if debug:
437 debug_desc = ": " + action + " " + path
438 if d['copyfrom_path']:
439 debug_desc += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
440 print debug_desc
441
442 if action == 'R':
443 # If file/folder was replaced, it has no ancestor
444 return []
445 if action == 'D':
446 # If file/folder was deleted, it has no ancestor
447 return []
448 if action == 'A':
449 # If file/folder was added but not a copy, it has no ancestor
450 if not d['copyfrom_path']:
451 return []
452 # Else, file/folder was added and is a copy, so check ancestors
453 path_old = d['copyfrom_path']
454 working_path = working_path.replace(path, path_old)
455 if working_base in working_path:
456 # If the new and old working_path share the same working_base, just need to update working_offset.
457 working_offset = working_path[len(working_base)+1:]
458 else:
459 # Else, assume that working_base has changed but working_offset is the same, e.g. a re-branch.
460 # TODO: Is this a safe assumption?!
461 working_base = working_path[:-len(working_offset)].rstrip('/')
462 working_rev = d['copyfrom_revision']
463 if debug:
464 print ">> find_svn_ancestors: copy-from: " + working_base + " " + working_offset + "@" + str(working_rev)
465 ancestors.append({'path': [working_base, working_offset], 'revision': working_rev})
466 # If we found a copy-from case which matches our source_base, we're done
467 if (path_old == source_base) or (path_old.startswith(source_base + "/")):
468 return ancestors
469 # Else, follow the copy and keep on searching
470 break
471 return None
472
473 def replay_svn_ancestors(ancestors, source_repos_url, source_url, target_url):
474 """
475 Given an array of ancestor info (find_svn_ancestors), replay the history
476 to correctly track renames ("svn copy/move") across branch-merges.
477
478 For example, consider a sequence of events like this:
479 1. svn copy /trunk /branches/fix1
480 2. (Make some changes on /branches/fix1)
481 3. svn copy /branches/fix1/Proj1 /branches/fix1/Proj2 " Rename folder
482 4. svn copy /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt " Rename file inside renamed folder
483 5. svn co /trunk && svn merge /branches/fix1
484 After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
485 and and add of /trunk/Proj2 comp-from /branches/fix1/Proj2. If we were just
486 to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
487 we'd lose the logical history that Proj2/file2.txt is really a descendant
488 of Proj1/file1.txt.
489
490 'source_repos_url' is the full URL to the root of the source repository.
491 'ancestors' is the array returned by find_svn_ancestors() with the final
492 destination info appended to it by process_svn_log_entry().
493 'dest_path'
494 """
495 # Ignore ancestors[0], which is the original (pre-branch-copy) trunk path
496 # Ignore ancestors[1], which is the original branch-creation commit
497 # Ignore ancestors[n], which is the final commit back to trunk
498 for idx in range(1, len(ancestors)-1):
499 ancestor = ancestors[idx]
500 source_base = ancestor['path'][0]
501 source_offset = ancestor['path'][1]
502 source_path = source_base + "/" + source_offset
503 source_rev = ancestor['revision']
504 source_rev_next = ancestors[idx+1]['revision']
505 # Do a "svn log" on the _parent_ directory of source_path, since trying to get log info
506 # for the "old path" on the revision where the copy/move happened will fail.
507 if "/" in source_path:
508 p_source_path = source_path[:source_path.rindex('/')]
509 else:
510 p_source_path = ""
511 if debug:
512 print ">> replay_svn_ancestors: ["+str(idx)+"]" + source_path+"@"+str(source_rev) + " ["+p_source_path+"@"+str(source_rev)+":"+str(source_rev_next-1)+"]"
513 it_log_entries = iter_svn_log_entries(source_repos_url+p_source_path, source_rev, source_rev_next-1)
514 for log_entry in it_log_entries:
515 #print ">> replay_svn_ancestors: log_entry: (" + source_repos_url+source_base + ")"
516 #print log_entry
517 # TODO: Hit a problem case with a rename-situation where the "remove" was committed ahead of the "add (copy)".
518 # Do we maybe need to buffer all the remove's until the end of the entire replay session?
519 # Or can we maybe work around this by passing an explicit rev # into "svn copy"?
520 process_svn_log_entry(log_entry, source_repos_url, source_repos_url+source_base, target_url)
521
522 def process_svn_log_entry(log_entry, source_repos_url, source_url, target_url):
523 """
524 Process SVN changes from the given log entry.
525 Returns array of all the paths in the working-copy that were changed,
526 i.e. the paths which need to be "svn commit".
527
528 'log_entry' is the array structure built by parse_svn_log_xml().
529 'source_repos_url' is the full URL to the root of the source repository.
530 'source_url' is the full URL to the source path in the source repository.
531 'target_url' is the full URL to the target path in the target repository.
532 """
533 # Get the relative offset of source_url based on source_repos_url, e.g. u'/branches/bug123'
534 source_base = source_url[len(source_repos_url):]
535 if debug:
536 print ">> process_svn_log_entry: " + source_url + " (" + source_base + ")"
537
538 svn_rev = log_entry['revision']
539 # Get current target revision, for "svn copy" support
540 dup_info = get_svn_info(target_url)
541 dup_rev = dup_info['revision']
542
543 removed_paths = []
544 unrelated_paths = []
545 commit_paths = []
546
547 for d in log_entry['changed_paths']:
548 # Get the full path for this changed_path
549 # e.g. u'/branches/bug123/projectA/file1.txt'
550 path = d['path']
551 if not path.startswith(source_base + "/"):
552 # Ignore changed files that are not part of this subdir
553 if path != source_base:
554 print ">> process_svn_log_entry: Unrelated path: " + path + " (" + source_base + ")"
555 unrelated_paths.append(path)
556 continue
557 # Calculate the offset (based on source_base) for this changed_path
558 # e.g. u'projectA/file1.txt'
559 # (path = source_base + "/" + path_offset)
560 path_offset = path[len(source_base):].strip("/")
561 # Get the action for this path
562 action = d['action']
563 if action not in 'MARD':
564 display_error("In SVN rev. %d: action '%s' not supported. \
565 Please report a bug!" % (svn_rev, action))
566
567 # Try to be efficient and keep track of an explicit list of paths in the
568 # working copy that changed. If we commit from the root of the working copy,
569 # then SVN needs to crawl the entire working copy looking for pending changes.
570 # But, if we gather too many paths to commit, then we wipe commit_paths below
571 # and end-up doing a commit at the root of the working-copy.
572 if len (commit_paths) < 100:
573 commit_paths.append(path_offset)
574
575 # Special-handling for replace's
576 is_replace = False
577 if action == 'R':
578 if svnlog_verbose:
579 msg = " " + action + " " + d['path']
580 if d['copyfrom_path']:
581 msg += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
582 print msg
583 # If file was "replaced" (deleted then re-added, all in same revision),
584 # then we need to run the "svn rm" first, then change action='A'. This
585 # lets the normal code below handle re-"svn add"'ing the files. This
586 # should replicate the "replace".
587 run_svn(["up", path_offset])
588 run_svn(["remove", "--force", path_offset])
589 action = 'A'
590 is_replace = True
591
592 # Handle all the various action-types
593 # (Handle "add" first, for "svn copy/move" support)
594 if action == 'A':
595 if svnlog_verbose:
596 msg = " " + action + " " + d['path']
597 if d['copyfrom_path']:
598 msg += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
599 print msg
600 # Determine where to export from
601 copyfrom_rev = svn_rev
602 copyfrom_path = path
603 svn_copy = False
604 # Handle cases where this "add" was a copy from another URL in the source repos
605 if d['copyfrom_revision']:
606 copyfrom_rev = d['copyfrom_revision']
607 copyfrom_path = d['copyfrom_path']
608 if debug:
609 print ">> process_svn_log_entry: copy-to: " + source_base + " " + path_offset
610 if source_base in copyfrom_path:
611 # If the copy-from path is inside the current working-copy, no need to check ancestry.
612 ancestors = []
613 copyfrom_path = copyfrom_path[len(source_base):].strip("/")
614 if debug:
615 print ">> process_svn_log_entry: Found copy: " + copyfrom_path+"@"+str(copyfrom_rev)
616 svn_copy = True
617 else:
618 ancestors = find_svn_ancestors(source_repos_url, source_base, path_offset,
619 copyfrom_path, copyfrom_rev)
620 if ancestors:
621 # Reverse the list, so that we loop in chronological order
622 ancestors.reverse()
623 # Append the current revision
624 ancestors.append({'path': [source_base, path_offset], 'revision': svn_rev})
625 # ancestors[0] is the original (pre-branch-copy) trunk path.
626 # ancestors[1] is the first commit on the new branch.
627 copyfrom_rev = ancestors[0]['revision']
628 copyfrom_base = ancestors[0]['path'][0]
629 copyfrom_offset = ancestors[0]['path'][1]
630 copyfrom_path = copyfrom_base + copyfrom_offset
631 if debug:
632 print ">> process_svn_log_entry: FOUND PARENT:"
633 for idx in range(0,len(ancestors)):
634 ancestor = ancestors[idx]
635 print " ["+str(idx)+"] " + ancestor['path'][0]+" "+ancestor['path'][1]+"@"+str(ancestor['revision'])
636 #print ">> process_svn_log_entry: copyfrom_path (before): " + copyfrom_path + " source_base: " + source_base + " p: " + p
637 copyfrom_path = copyfrom_path[len(source_base):].strip("/")
638 #print ">> process_svn_log_entry: copyfrom_path (after): " + copyfrom_path
639 svn_copy = True
640 # If this add was a copy-from, do a smart replay of the ancestors' history.
641 if svn_copy:
642 if debug:
643 print ">> process_svn_log_entry: svn_copy: copy-from: " + copyfrom_path+"@"+str(copyfrom_rev) + " source_base: "+source_base + " len(ancestors): " + str(len(ancestors))
644 # If we don't have any ancestors, then this is just a straight "svn copy" in the current working-copy.
645 if not ancestors:
646 # ...but not if the target is already tracked, because this might run several times for the same path.
647 # TODO: Is there a better way to avoid recusion bugs? Maybe a collection of processed paths?
648 # TODO: The "not in_svn" check creates problems for action="R" cases, e.g. r18834
649 if (not in_svn(path_offset)) or is_replace:
650 if os.path.exists(copyfrom_path):
651 # If the copyfrom_path exists in the working-copy, do a local copy
652 run_svn(["copy", copyfrom_path, path_offset])
653 else:
654 # TODO: This doesn't respect copyfrom_rev at all. Found a case where file was (accidentally?)
655 # deleted in one commit and restored (added copy-from) in a latter commit. Do we maybe
656 # need a mapping table of target_url -> source_url rev #'s, so that given a source_url
657 # copyfrom_rev, we can map that to the equiv target_url rev#, so we do the "svn copy"
658 # here correctly?
659 tmp_rev = dup_rev # Kludge for time-being
660 if copyfrom_path == 'Data/Databases/DBUpdate.mdb' and copyfrom_rev == 17568:
661 tmp_rev = dup_rev-10
662 run_svn(["copy", "-r", tmp_rev, target_url+"/"+copyfrom_path+"@"+str(tmp_rev), path_offset])
663 else:
664 if d['kind'] == 'dir':
665 # Replay any actions which happened to this folder from the ancestor path(s).
666 replay_svn_ancestors(ancestors, source_repos_url, source_url, target_url)
667 else:
668 # Just do a straight "svn copy" for files. There isn't any kind of "dependent"
669 # history we might need to replay like for folders.
670 # TODO: Is this logic really correct? Doing a WC vs URL "svn copy" based on existence
671 # of *source* location seems a bit kludgy. Should there be a running list of
672 # renames during replay_svn_ancestors >> process_svn_log_entry?
673 if os.path.exists(copyfrom_path):
674 # If the copyfrom_path exists in the working-copy, do a local copy
675 run_svn(["copy", copyfrom_path, path_offset])
676 else:
677 # Else, could be a situation where replay_svn_ancestors() is replaying branch
678 # history and a copy was committed across two revisions: first the deletion
679 # followed by the later add. In such a case, we need to copy from HEAD (dup_rev)
680 # of the path in *target_url*
681 run_svn(["copy", "-r", dup_rev, target_url+"/"+copyfrom_path+"@"+str(dup_rev), path_offset])
682 # Else just copy/export the files from the source repo and "svn add" them.
683 else:
684 # Create (parent) directory if needed
685 if d['kind'] == 'dir':
686 p_path = path_offset
687 else:
688 p_path = os.path.dirname(path_offset).strip() or '.'
689 if not os.path.exists(p_path):
690 os.makedirs(p_path)
691 # Export the entire added tree.
692 run_svn(["export", "--force", "-r", str(copyfrom_rev),
693 source_repos_url + copyfrom_path + "@" + str(copyfrom_rev), path_offset])
694 # TODO: The "no in_svn" condition here is wrong for replace cases.
695 # Added the in_svn condition here originally since "svn export" is recursive
696 # but "svn log" will have an entry for each indiv file, hence we run into a
697 # cannot-re-add-file-which-is-already-added issue.
698 if (not in_svn(path_offset)) or (is_replace):
699 run_svn(["add", "--parents", path_offset])
700 # TODO: Need to copy SVN properties from source repos
701
702 elif action == 'D':
703 # Queue "svn remove" commands, to allow the action == 'A' handling the opportunity
704 # to do smart "svn copy" handling on copy/move/renames.
705 removed_paths.append(path_offset)
706
707 elif action == 'M':
708 if svnlog_verbose:
709 print " " + action + " " + d['path']
710 out = run_svn(["merge", "-c", str(svn_rev), "--non-recursive",
711 "--non-interactive", "--accept=theirs-full",
712 source_url+"/"+path_offset+"@"+str(svn_rev), path_offset])
713
714 else:
715 display_error("Internal Error: pull_svn_rev: Unhandled 'action' value: '" + action + "'")
716
717 if removed_paths:
718 for path_offset in removed_paths:
719 if svnlog_verbose:
720 print " D " + source_base+"/"+path_offset
721 run_svn(["remove", "--force", path_offset])
722
723 if unrelated_paths:
724 print "Unrelated paths: (vs. '" + source_base + "')"
725 print "*", unrelated_paths
726
727 return commit_paths
728
729 def pull_svn_rev(log_entry, source_repos_url, source_repos_uuid, source_url, target_url, keep_author=False):
730 """
731 Pull SVN changes from the given log entry.
732 Returns the new SVN revision.
733 If an exception occurs, it will rollback to revision 'svn_rev - 1'.
734 """
735 ## Get the relative offset of source_url based on source_repos_url, e.g. u'/branches/bug123'
736 #source_base = source_url[len(source_repos_url):]
737
738 svn_rev = log_entry['revision']
739 print "\n(Starting source rev #"+str(svn_rev)+":)"
740 print "r"+str(log_entry['revision']) + " | " + \
741 log_entry['author'] + " | " + \
742 str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' '))
743 print log_entry['message']
744 print "------------------------------------------------------------------------"
745 commit_paths = process_svn_log_entry(log_entry, source_repos_url, source_url, target_url)
746
747 # If we had too many individual paths to commit, wipe the list and just commit at
748 # the root of the working copy.
749 if len (commit_paths) > 99:
750 commit_paths = []
751
752 # TODO: Use SVN properties to track source URL + rev in the target repo?
753 # This would provide a more reliable resume-support
754 try:
755 commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author)
756 except ExternalCommandFailed:
757 # try to ignore the Properties conflicts on files and dirs
758 # use the copy from original_wc
759 # TODO: Need to re-work this?
760 #has_Conflict = False
761 #for d in log_entry['changed_paths']:
762 # p = d['path']
763 # p = p[len(source_base):].strip("/")
764 # if os.path.isfile(p):
765 # if os.path.isfile(p + ".prej"):
766 # has_Conflict = True
767 # shutil.copy(original_wc + os.sep + p, p)
768 # p2=os.sep + p.replace('_', '__').replace('/', '_') \
769 # + ".prej-" + str(svn_rev)
770 # shutil.move(p + ".prej", os.path.dirname(original_wc) + p2)
771 # w="\n### Properties conflicts ignored:"
772 # print "%s %s, in revision: %s\n" % (w, p, svn_rev)
773 # elif os.path.isdir(p):
774 # if os.path.isfile(p + os.sep + "dir_conflicts.prej"):
775 # has_Conflict = True
776 # p2=os.sep + p.replace('_', '__').replace('/', '_') \
777 # + "_dir__conflicts.prej-" + str(svn_rev)
778 # shutil.move(p + os.sep + "dir_conflicts.prej",
779 # os.path.dirname(original_wc) + p2)
780 # w="\n### Properties conflicts ignored:"
781 # print "%s %s, in revision: %s\n" % (w, p, svn_rev)
782 # out = run_svn(["propget", "svn:ignore",
783 # original_wc + os.sep + p])
784 # if out:
785 # run_svn(["propset", "svn:ignore", out.strip(), p])
786 # out = run_svn(["propget", "svn:externel",
787 # original_wc + os.sep + p])
788 # if out:
789 # run_svn(["propset", "svn:external", out.strip(), p])
790 ## try again
791 #if has_Conflict:
792 # commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author)
793 #else:
794 raise ExternalCommandFailed
795
796 # Add source-tracking revprop's
797 run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_uuid", source_repos_uuid])
798 run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_url", source_repos_url])
799 run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_rev", svn_rev])
800 print "(Finished source rev #"+str(svn_rev)+")"
801
802
803 def main():
804 usage = "Usage: %prog [-a] [-c] [-r SVN rev] <Source SVN URL> <Target SVN URL>"
805 parser = OptionParser(usage)
806 parser.add_option("-a", "--keep-author", action="store_true",
807 dest="keep_author", help="Keep revision Author or not")
808 parser.add_option("-c", "--continue-from-break", action="store_true",
809 dest="cont_from_break",
810 help="Continue from previous break")
811 parser.add_option("-r", "--svn-rev", type="int", dest="svn_rev",
812 help="SVN revision to checkout from")
813 (options, args) = parser.parse_args()
814 if len(args) != 2:
815 display_error("incorrect number of arguments\n\nTry: svn2svn.py --help",
816 False)
817
818 source_url = args.pop(0).rstrip("/")
819 target_url = args.pop(0).rstrip("/")
820 if options.keep_author:
821 keep_author = True
822 else:
823 keep_author = False
824
825 # Find the greatest_rev in the source repo
826 svn_info = get_svn_info(source_url)
827 greatest_rev = svn_info['revision']
828 # Get the base URL for the source repos, e.g. u'svn://svn.example.com/svn/repo'
829 source_repos_url = svn_info['repos_url']
830 # Get the UUID for the source repos
831 source_repos_uuid = svn_info['repos_uuid']
832
833 dup_wc = "_dup_wc"
834
835 # if old working copy does not exist, disable continue mode
836 # TODO: Better continue support. Maybe include source repo's rev # in target commit info?
837 if not os.path.exists(dup_wc):
838 options.cont_from_break = False
839
840 if not options.cont_from_break:
841 # Warn if Target SVN URL existed
842 cmd = find_program("svn")
843 pipe = Popen([cmd] + ["list"] + [target_url], executable=cmd,
844 stdout=PIPE, stderr=PIPE)
845 out, err = pipe.communicate()
846 if pipe.returncode == 0:
847 print "Target SVN URL: %s existed!" % target_url
848 if out:
849 print out
850 print "Press 'Enter' to Continue, 'Ctrl + C' to Cancel..."
851 print "(Timeout in 5 seconds)"
852 rfds, wfds, efds = select.select([sys.stdin], [], [], 5)
853
854 # Get log entry for the SVN revision we will check out
855 if options.svn_rev:
856 # If specify a rev, get log entry just before or at rev
857 svn_start_log = get_last_svn_log_entry(source_url, 1, options.svn_rev, False)
858 else:
859 # Otherwise, get log entry of branch creation
860 # TODO: This call is *very* expensive on a repo with lots of revisions.
861 # Even though the call is passing --limit 1, it seems like that limit-filter
862 # is happening after SVN has fetched the full log history.
863 svn_start_log = get_first_svn_log_entry(source_url, 1, greatest_rev, False)
864
865 # This is the revision we will start from for source_url
866 svn_rev = svn_start_log['revision']
867
868 # Check out a working copy of target_url
869 dup_wc = os.path.abspath(dup_wc)
870 if os.path.exists(dup_wc):
871 shutil.rmtree(dup_wc)
872 svn_checkout(target_url, dup_wc)
873 os.chdir(dup_wc)
874
875 # For the initial commit to the target URL, export all the contents from
876 # the source URL at the start-revision.
877 paths = run_svn(["list", "-r", str(svn_rev), source_url+"@"+str(svn_rev)])
878 paths = paths.strip("\n").split("\n")
879 for path in paths:
880 if not path:
881 # Skip null lines
882 break
883 # Directories have a trailing slash in the "svn list" output
884 if path[-1] == "/":
885 path=path.rstrip('/')
886 if not os.path.exists(path):
887 os.makedirs(path)
888 run_svn(["export", "--force", "-r" , str(svn_rev), source_url+"/"+path+"@"+str(svn_rev), path])
889 run_svn(["add", path])
890 commit_from_svn_log_entry(svn_start_log, [], keep_author)
891 # Add source-tracking revprop's
892 run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_uuid", source_repos_uuid])
893 run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_url", source_repos_url])
894 run_svn(["propset", "--revprop", "-r", "HEAD", "svn2svn:source_rev", svn_rev])
895 else:
896 dup_wc = os.path.abspath(dup_wc)
897 os.chdir(dup_wc)
898 # TODO: Need better resume support. For the time being, expect caller explictly passes in resume revision.
899 svn_rev = options.svn_rev
900 if svn_rev < 1:
901 display_error("Invalid arguments\n\nNeed to pass result rev # (-r) when using continue-mode (-c)", False)
902
903 # Load SVN log starting from svn_rev + 1
904 it_log_entries = iter_svn_log_entries(source_url, svn_rev + 1, greatest_rev)
905
906 try:
907 for log_entry in it_log_entries:
908 # Replay this revision from source_url into target_url
909 pull_svn_rev(log_entry, source_repos_url, source_repos_uuid, source_url, target_url, keep_author)
910 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
911 run_svn(["up", dup_wc])
912
913 except KeyboardInterrupt:
914 print "\nStopped by user."
915 run_svn(["cleanup"])
916 run_svn(["revert", "--recursive", "."])
917 except:
918 print "\nCommand failed with following error:\n"
919 traceback.print_exc()
920 run_svn(["cleanup"])
921 run_svn(["revert", "--recursive", "."])
922 finally:
923 run_svn(["up"])
924 print "\nFinished!"
925
926
927 if __name__ == "__main__":
928 main()
929
930 # vim:sts=4:sw=4: