]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn.py
More changes for find_svn_ancestors() support
[svn2svn.git] / svn2svn.py
1 #!/usr/bin/env python
2 """
3 svn2svn.py
4
5 Replicate (replay) changesets from one SVN repository to another:
6 * Maintains full logical history (e.g. uses "svn copy" for renames).
7 * Maintains original commit messages.
8 * Cannot maintain original commit date, but appends original commit date
9 for each commit message: "Date: %d".
10 * Optionally maintain source author info. (Only supported if accessing
11 target SVN repo via file://)
12 * Optionally run an external shell script before each replayed commit
13 to give the ability to dynamically exclude or modify files as part
14 of the replay.
15
16 License: GPLv2, the same as hgsvn.
17 Author: Tony Duckles (https://github.com/tonyduckles/svn2svn)
18 (This is a forked and modified verison of http://code.google.com/p/svn2svn/)
19 """
20
21 import os
22 import sys
23 import time
24 import locale
25 import shutil
26 import select
27 import calendar
28 import traceback
29
30 from optparse import OptionParser
31 from subprocess import Popen, PIPE
32 from datetime import datetime
33
34 try:
35 from xml.etree import cElementTree as ET
36 except ImportError:
37 try:
38 from xml.etree import ElementTree as ET
39 except ImportError:
40 try:
41 import cElementTree as ET
42 except ImportError:
43 from elementtree import ElementTree as ET
44
45 svn_log_args = ['log', '--xml', '-v']
46 svn_info_args = ['info', '--xml']
47 svn_checkout_args = ['checkout', '-q']
48 svn_status_args = ['status', '--xml', '-v', '--ignore-externals']
49
50 # Setup debug options
51 debug = True
52 debug_runsvn_timing = False # Display how long each "svn" OS command took to run?
53 # Setup verbosity options
54 runsvn_verbose = True # Echo every "svn" OS command we run?
55 svnlog_verbose = True # Echo each action + changed-path as we walk the history?
56
57 # define exception class
58 class ExternalCommandFailed(RuntimeError):
59 """
60 An external command failed.
61 """
62
63 def display_error(message, raise_exception = True):
64 """
65 Display error message, then terminate.
66 """
67 print "Error:", message
68 print
69 if raise_exception:
70 raise ExternalCommandFailed
71 else:
72 sys.exit(1)
73
74 # Windows compatibility code by Bill Baxter
75 if os.name == "nt":
76 def find_program(name):
77 """
78 Find the name of the program for Popen.
79 Windows is finnicky about having the complete file name. Popen
80 won't search the %PATH% for you automatically.
81 (Adapted from ctypes.find_library)
82 """
83 # See MSDN for the REAL search order.
84 base, ext = os.path.splitext(name)
85 if ext:
86 exts = [ext]
87 else:
88 exts = ['.bat', '.exe']
89 for directory in os.environ['PATH'].split(os.pathsep):
90 for e in exts:
91 fname = os.path.join(directory, base + e)
92 if os.path.exists(fname):
93 return fname
94 return None
95 else:
96 def find_program(name):
97 """
98 Find the name of the program for Popen.
99 On Unix, popen isn't picky about having absolute paths.
100 """
101 return name
102
103 def shell_quote(s):
104 if os.name == "nt":
105 q = '"'
106 else:
107 q = "'"
108 return q + s.replace('\\', '\\\\').replace("'", "'\"'\"'") + q
109
110 locale_encoding = locale.getpreferredencoding()
111
112 def run_svn(args, fail_if_stderr=False, encoding="utf-8"):
113 """
114 Run svn cmd in PIPE
115 exit if svn cmd failed
116 """
117 def _transform_arg(a):
118 if isinstance(a, unicode):
119 a = a.encode(encoding or locale_encoding)
120 elif not isinstance(a, str):
121 a = str(a)
122 return a
123 t_args = map(_transform_arg, args)
124
125 cmd = find_program("svn")
126 cmd_string = str(" ".join(map(shell_quote, [cmd] + t_args)))
127 if runsvn_verbose:
128 print "$", cmd_string
129 if debug_runsvn_timing:
130 time1 = time.time()
131 pipe = Popen([cmd] + t_args, executable=cmd, stdout=PIPE, stderr=PIPE)
132 out, err = pipe.communicate()
133 if debug_runsvn_timing:
134 time2 = time.time()
135 print "(" + str(round(time2-time1,4)) + " elapsed)"
136 if pipe.returncode != 0 or (fail_if_stderr and err.strip()):
137 display_error("External program failed (return code %d): %s\n%s"
138 % (pipe.returncode, cmd_string, err))
139 return out
140
141 def svn_date_to_timestamp(svn_date):
142 """
143 Parse an SVN date as read from the XML output and
144 return the corresponding timestamp.
145 """
146 # Strip microseconds and timezone (always UTC, hopefully)
147 # XXX there are various ISO datetime parsing routines out there,
148 # cf. http://seehuhn.de/comp/pdate
149 date = svn_date.split('.', 2)[0]
150 time_tuple = time.strptime(date, "%Y-%m-%dT%H:%M:%S")
151 return calendar.timegm(time_tuple)
152
153 def parse_svn_info_xml(xml_string):
154 """
155 Parse the XML output from an "svn info" command and extract
156 useful information as a dict.
157 """
158 d = {}
159 tree = ET.fromstring(xml_string)
160 entry = tree.find('.//entry')
161 if entry:
162 d['url'] = entry.find('url').text
163 d['revision'] = int(entry.get('revision'))
164 d['repos_url'] = tree.find('.//repository/root').text
165 d['last_changed_rev'] = int(tree.find('.//commit').get('revision'))
166 d['kind'] = entry.get('kind')
167 return d
168
169 def parse_svn_log_xml(xml_string):
170 """
171 Parse the XML output from an "svn log" command and extract
172 useful information as a list of dicts (one per log changeset).
173 """
174 l = []
175 tree = ET.fromstring(xml_string)
176 for entry in tree.findall('logentry'):
177 d = {}
178 d['revision'] = int(entry.get('revision'))
179 # Some revisions don't have authors, most notably
180 # the first revision in a repository.
181 author = entry.find('author')
182 d['author'] = author is not None and author.text or None
183 d['date'] = svn_date_to_timestamp(entry.find('date').text)
184 # Some revisions may have empty commit message
185 message = entry.find('msg')
186 message = message is not None and message.text is not None \
187 and message.text.strip() or ""
188 # Replace DOS return '\r\n' and MacOS return '\r' with unix return '\n'
189 d['message'] = message.replace('\r\n', '\n').replace('\n\r', '\n'). \
190 replace('\r', '\n')
191 paths = d['changed_paths'] = []
192 for path in entry.findall('.//path'):
193 copyfrom_rev = path.get('copyfrom-rev')
194 if copyfrom_rev:
195 copyfrom_rev = int(copyfrom_rev)
196 paths.append({
197 'path': path.text,
198 'kind': path.get('kind'),
199 'action': path.get('action'),
200 'copyfrom_path': path.get('copyfrom-path'),
201 'copyfrom_revision': copyfrom_rev,
202 })
203 # Need to sort paths (i.e. into hierarchical order), so that process_svn_log_entry()
204 # can process actions in depth-first order.
205 paths.sort()
206 l.append(d)
207 return l
208
209 def parse_svn_status_xml(xml_string, base_dir=None):
210 """
211 Parse the XML output from an "svn status" command and extract
212 useful info as a list of dicts (one per status entry).
213 """
214 l = []
215 tree = ET.fromstring(xml_string)
216 for entry in tree.findall('.//entry'):
217 d = {}
218 path = entry.get('path')
219 if base_dir is not None:
220 assert path.startswith(base_dir)
221 path = path[len(base_dir):].lstrip('/\\')
222 d['path'] = path
223 wc_status = entry.find('wc-status')
224 if wc_status.get('item') == 'external':
225 d['type'] = 'external'
226 elif wc_status.get('revision') is not None:
227 d['type'] = 'normal'
228 else:
229 d['type'] = 'unversioned'
230 l.append(d)
231 return l
232
233 def get_svn_info(svn_url_or_wc, rev_number=None):
234 """
235 Get SVN information for the given URL or working copy,
236 with an optionally specified revision number.
237 Returns a dict as created by parse_svn_info_xml().
238 """
239 if rev_number is not None:
240 args = [svn_url_or_wc + "@" + str(rev_number)]
241 else:
242 args = [svn_url_or_wc]
243 xml_string = run_svn(svn_info_args + args,
244 fail_if_stderr=True)
245 return parse_svn_info_xml(xml_string)
246
247 def svn_checkout(svn_url, checkout_dir, rev_number=None):
248 """
249 Checkout the given URL at an optional revision number.
250 """
251 args = []
252 if rev_number is not None:
253 args += ['-r', rev_number]
254 args += [svn_url, checkout_dir]
255 return run_svn(svn_checkout_args + args)
256
257 def run_svn_log(svn_url_or_wc, rev_start, rev_end, limit, stop_on_copy=False):
258 """
259 Fetch up to 'limit' SVN log entries between the given revisions.
260 """
261 if stop_on_copy:
262 args = ['--stop-on-copy']
263 else:
264 args = []
265 url = str(svn_url_or_wc)
266 if rev_start != 'HEAD' and rev_end != 'HEAD':
267 args += ['-r', '%s:%s' % (rev_start, rev_end)]
268 if not "@" in svn_url_or_wc:
269 url += "@" + str(rev_end)
270 args += ['--limit', str(limit), url]
271 xml_string = run_svn(svn_log_args + args)
272 return parse_svn_log_xml(xml_string)
273
274 def get_svn_status(svn_wc, flags=None):
275 """
276 Get SVN status information about the given working copy.
277 """
278 # Ensure proper stripping by canonicalizing the path
279 svn_wc = os.path.abspath(svn_wc)
280 args = []
281 if flags:
282 args += [flags]
283 args += [svn_wc]
284 xml_string = run_svn(svn_status_args + args)
285 return parse_svn_status_xml(xml_string, svn_wc)
286
287 def get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=False):
288 """
289 Get the first SVN log entry in the requested revision range.
290 """
291 entries = run_svn_log(svn_url, rev_start, rev_end, 1, stop_on_copy)
292 if not entries:
293 display_error("No SVN log for %s between revisions %s and %s" %
294 (svn_url, rev_start, rev_end))
295
296 return entries[0]
297
298 def get_first_svn_log_entry(svn_url, rev_start, rev_end):
299 """
300 Get the first log entry after/at the given revision number in an SVN branch.
301 By default the revision number is set to 0, which will give you the log
302 entry corresponding to the branch creaction.
303
304 NOTE: to know whether the branch creation corresponds to an SVN import or
305 a copy from another branch, inspect elements of the 'changed_paths' entry
306 in the returned dictionary.
307 """
308 return get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True)
309
310 def get_last_svn_log_entry(svn_url, rev_start, rev_end):
311 """
312 Get the last log entry before/at the given revision number in an SVN branch.
313 By default the revision number is set to HEAD, which will give you the log
314 entry corresponding to the latest commit in branch.
315 """
316 return get_one_svn_log_entry(svn_url, rev_end, rev_start, stop_on_copy=True)
317
318
319 log_duration_threshold = 10.0
320 log_min_chunk_length = 10
321
322 def iter_svn_log_entries(svn_url, first_rev, last_rev):
323 """
324 Iterate over SVN log entries between first_rev and last_rev.
325
326 This function features chunked log fetching so that it isn't too nasty
327 to the SVN server if many entries are requested.
328 """
329 cur_rev = first_rev
330 chunk_length = log_min_chunk_length
331 chunk_interval_factor = 1.0
332 while last_rev == "HEAD" or cur_rev <= last_rev:
333 start_t = time.time()
334 stop_rev = min(last_rev, cur_rev + int(chunk_length * chunk_interval_factor))
335 entries = run_svn_log(svn_url, cur_rev, stop_rev, chunk_length)
336 duration = time.time() - start_t
337 if not entries:
338 if stop_rev == last_rev:
339 break
340 cur_rev = stop_rev + 1
341 chunk_interval_factor *= 2.0
342 continue
343 for e in entries:
344 yield e
345 cur_rev = e['revision'] + 1
346 # Adapt chunk length based on measured request duration
347 if duration < log_duration_threshold:
348 chunk_length = int(chunk_length * 2.0)
349 elif duration > log_duration_threshold * 2:
350 chunk_length = max(log_min_chunk_length, int(chunk_length / 2.0))
351
352 def commit_from_svn_log_entry(entry, files=None, keep_author=False):
353 """
354 Given an SVN log entry and an optional sequence of files, do an svn commit.
355 """
356 # This will use the local timezone for displaying commit times
357 timestamp = int(entry['date'])
358 svn_date = str(datetime.fromtimestamp(timestamp))
359 # Uncomment this one one if you prefer UTC commit times
360 #svn_date = "%d 0" % timestamp
361 if keep_author:
362 options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date, "--username", entry['author']]
363 else:
364 options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date + "\nAuthor: " + entry['author']]
365 if files:
366 options += list(files)
367 run_svn(options)
368 print ""
369
370 def in_svn(p):
371 """
372 Check if a given file/folder is being tracked by Subversion.
373 Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
374 With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
375 Use "svn status" to check the status of the file/folder.
376 """
377 # TODO: Is there a better way to do this?
378 entries = get_svn_status(p)
379 if not entries:
380 return False
381 d = entries[0]
382 return (d['type'] == 'normal')
383
384 def find_svn_ancestors(source_repos_url, source_base, source_offset, copyfrom_path, copyfrom_rev):
385 """
386 Given a copy-from path (copyfrom_path), walk the SVN history backwards to inspect
387 the ancestory of that path. Build a collection of copyfrom_path+revision pairs
388 for each of the branch-copies since the initial branch-creation. If we find a
389 copyfrom_path which source_base is a substring match of (e.g. we crawled back to
390 the initial branch-copy from trunk), then return the collection of ancestor paths.
391 Otherwise, copyfrom_path has no ancestory compared to source_base.
392
393 This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
394 file/folder was renamed in a branch and then that branch was merged back to trunk.
395
396 PARAMETERS:
397 * source_repos_url = Full URL to root of repository, e.g. 'file:///path/to/repos'
398 * source_base = e.g. '/trunk'
399 * source_offset = e.g. 'projectA/file1.txt'
400 * copyfrom_path = e.g. '/branches/bug123/projectA/file1.txt'
401 """
402
403 done = False
404 working_path = copyfrom_path
405 working_base = copyfrom_path[:-len(source_offset)].rstrip('/')
406 working_offset = source_offset.strip('/')
407 working_rev = copyfrom_rev
408 ancestors = [{'path': [working_base, working_offset], 'revision': working_rev}]
409 while not done:
410 # Get the first "svn log" entry for this path (relative to @rev)
411 #working_path = working_base + "/" + working_offset
412 if debug:
413 print ">> find_svn_ancestors: " + source_repos_url + working_path + "@" + str(working_rev) + \
414 " (" + working_base + " " + working_offset + ")"
415 log_entry = get_first_svn_log_entry(source_repos_url + working_path + "@" + str(working_rev), 1, str(working_rev))
416 if not log_entry:
417 done = True
418 # Find the action for our working_path in this revision
419 for d in log_entry['changed_paths']:
420 path = d['path']
421 if not path in working_path:
422 continue
423 # Check action-type for this file
424 action = d['action']
425 if action not in 'MARD':
426 display_error("In SVN rev. %d: action '%s' not supported. \
427 Please report a bug!" % (log_entry['revision'], action))
428 if debug:
429 debug_desc = ": " + action + " " + path
430 if d['copyfrom_path']:
431 debug_desc += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
432 print debug_desc
433
434 if action == 'R':
435 # If file/folder was replaced, it has no ancestor
436 return []
437 if action == 'D':
438 # If file/folder was deleted, it has no ancestor
439 return []
440 if action == 'A':
441 # If file/folder was added but not a copy, it has no ancestor
442 if not d['copyfrom_path']:
443 return []
444 # Else, file/folder was added and is a copy, so check ancestors
445 path_old = d['copyfrom_path']
446 working_path = working_path.replace(path, path_old)
447 if working_base in working_path:
448 # If the new and old working_path share the same working_base, just need to update working_offset.
449 working_offset = working_path[len(working_base)+1:]
450 else:
451 # Else, assume that working_base has changed but working_offset is the same, e.g. a re-branch.
452 # TODO: Is this a safe assumption?!
453 working_base = working_path[:-len(working_offset)].rstrip('/')
454 working_rev = d['copyfrom_revision']
455 if debug:
456 print ">> find_svn_ancestors: copy-from: " + working_base + " " + working_offset + "@" + str(working_rev)
457 ancestors.append({'path': [working_base, working_offset], 'revision': working_rev})
458 # If we found a copy-from case which matches our source_base, we're done
459 if (path_old == source_base) or (path_old.startswith(source_base + "/")):
460 return ancestors
461 # Else, follow the copy and keep on searching
462 break
463 return None
464
465 def replay_svn_ancestors(ancestors, source_repos_url, source_url, target_url, original_wc):
466 """
467 Given an array of ancestor info (find_svn_ancestors), replay the history
468 to correctly track renames ("svn copy/move") across branch-merges.
469
470 For example, consider a sequence of events like this:
471 1. svn copy /trunk /branches/fix1
472 2. (Make some changes on /branches/fix1)
473 3. svn copy /branches/fix1/Proj1 /branches/fix1/Proj2 " Rename folder
474 4. svn copy /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt " Rename file inside renamed folder
475 5. svn co /trunk && svn merge /branches/fix1
476 After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
477 and and add of /trunk/Proj2 comp-from /branches/fix1/Proj2. If we were just
478 to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
479 we'd lose the logical history that Proj2/file2.txt is really a descendant
480 of Proj1/file1.txt.
481
482 'source_repos_url' is the full URL to the root of the source repository.
483 'ancestors' is the array returned by find_svn_ancestors() with the final
484 destination info appended to it by process_svn_log_entry().
485 'dest_path'
486 """
487 # Ignore ancestors[0], which is the original (pre-branch-copy) trunk path
488 # Ignore ancestors[1], which is the original branch-creation commit
489 # Ignore ancestors[n], which is the final commit back to trunk
490 for idx in range(1, len(ancestors)-1):
491 ancestor = ancestors[idx]
492 source_base = ancestor['path'][0]
493 source_offset = ancestor['path'][1]
494 source_path = source_base + "/" + source_offset
495 source_rev = ancestor['revision']
496 source_rev_next = ancestors[idx+1]['revision']
497 # Do a "svn log" on the _parent_ directory of source_path, since trying to get log info
498 # for the "old path" on the revision where the copy/move happened will fail.
499 if "/" in source_path:
500 p_source_path = source_path[:source_path.rindex('/')]
501 else:
502 p_source_path = ""
503 if debug:
504 print ">> replay_svn_ancestors: ["+str(idx)+"]" + source_path+"@"+str(source_rev) + " ["+p_source_path+"@"+str(source_rev)+":"+str(source_rev_next-1)+"]"
505 it_log_entries = iter_svn_log_entries(source_repos_url+p_source_path, source_rev, source_rev_next-1)
506 for log_entry in it_log_entries:
507 #print ">> replay_svn_ancestors: log_entry: (" + source_repos_url+source_base + ")"
508 #print log_entry
509 process_svn_log_entry(log_entry, source_repos_url, source_repos_url+source_base, target_url, original_wc)
510
511 def process_svn_log_entry(log_entry, source_repos_url, source_url, target_url, original_wc, source_offset=""):
512 """
513 Process SVN changes from the given log entry.
514 Returns array of all the paths in the working-copy that were changed,
515 i.e. the paths which need to be "svn commit".
516
517 'log_entry' is the array structure built by parse_svn_log_xml().
518 'source_repos_url' is the full URL to the root of the source repository.
519 'source_url' is the full URL to the source path in the source repository.
520 'target_url' is the full URL to the target path in the target repository.
521 """
522 # Get the relative offset of source_url based on source_repos_url, e.g. u'/branches/bug123'
523 source_base = source_url[len(source_repos_url):]
524 if debug:
525 print ">> process_svn_log_entry: " + source_url + " (" + source_base + ")"
526
527 svn_rev = log_entry['revision']
528
529 removed_paths = []
530 modified_paths = []
531 unrelated_paths = []
532 commit_paths = []
533
534 for d in log_entry['changed_paths']:
535 if svnlog_verbose:
536 msg = " " + d['action'] + " " + d['path']
537 if d['copyfrom_path']:
538 msg += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
539 print msg
540 # Get the full path for this changed_path
541 # e.g. u'/branches/bug123/projectA/file1.txt'
542 path = d['path']
543 if not path.startswith(source_base + "/"):
544 # Ignore changed files that are not part of this subdir
545 if path != source_base:
546 print ">> process_svn_log_entry: Unrelated path: " + path + " (" + source_base + ")"
547 unrelated_paths.append(path)
548 continue
549 # Calculate the offset (based on source_base) for this changed_path
550 # e.g. u'projectA/file1.txt'
551 # (path = source_base + "/" + path_offset)
552 path_offset = path[len(source_base):].strip("/")
553 # Get the action for this path
554 action = d['action']
555 if action not in 'MARD':
556 display_error("In SVN rev. %d: action '%s' not supported. \
557 Please report a bug!" % (svn_rev, action))
558
559 # Try to be efficient and keep track of an explicit list of paths in the
560 # working copy that changed. If we commit from the root of the working copy,
561 # then SVN needs to crawl the entire working copy looking for pending changes.
562 # But, if we gather too many paths to commit, then we wipe commit_paths below
563 # and end-up doing a commit at the root of the working-copy.
564 if len (commit_paths) < 100:
565 commit_paths.append(path_offset)
566
567 # Special-handling for replace's
568 if action == 'R':
569 # If file was "replaced" (deleted then re-added, all in same revision),
570 # then we need to run the "svn rm" first, then change action='A'. This
571 # lets the normal code below handle re-"svn add"'ing the files. This
572 # should replicate the "replace".
573 run_svn(["up", path_offset])
574 run_svn(["remove", "--force", path_offset])
575 action = 'A'
576
577 # Handle all the various action-types
578 # (Handle "add" first, for "svn copy/move" support)
579 if action == 'A':
580 # Determine where to export from
581 copyfrom_rev = svn_rev
582 copyfrom_path = path
583 svn_copy = False
584 # Handle cases where this "add" was a copy from another URL in the source repos
585 if d['copyfrom_revision']:
586 copyfrom_rev = d['copyfrom_revision']
587 copyfrom_path = d['copyfrom_path']
588 print ">> process_svn_log_entry: copy-to: " + source_base + " " + source_offset + " " + path_offset
589 if source_base in copyfrom_path:
590 # If the copy-from path is inside the current working-copy, no need to check ancestry.
591 ancestors = []
592 copyfrom_path = copyfrom_path[len(source_base):].strip("/")
593 if debug:
594 print ">> process_svn_log_entry: Found copy: " + copyfrom_path+"@"+str(copyfrom_rev)
595 svn_copy = True
596 else:
597 ancestors = find_svn_ancestors(source_repos_url, source_base, path_offset,
598 copyfrom_path, copyfrom_rev)
599 if ancestors:
600 # Reverse the list, so that we loop in chronological order
601 ancestors.reverse()
602 # Append the current revision
603 ancestors.append({'path': [source_base, path_offset], 'revision': svn_rev})
604 # ancestors[0] is the original (pre-branch-copy) trunk path.
605 # ancestors[1] is the first commit on the new branch.
606 copyfrom_rev = ancestors[0]['revision']
607 copyfrom_base = ancestors[0]['path'][0]
608 copyfrom_offset = ancestors[0]['path'][1]
609 copyfrom_path = copyfrom_base + copyfrom_offset
610 if debug:
611 print ">> process_svn_log_entry: FOUND PARENT:"
612 for idx in range(0,len(ancestors)):
613 ancestor = ancestors[idx]
614 print " ["+str(idx)+"] " + ancestor['path'][0]+" "+ancestor['path'][1]+"@"+str(ancestor['revision'])
615 #print ">> process_svn_log_entry: copyfrom_path (before): " + copyfrom_path + " source_base: " + source_base + " p: " + p
616 copyfrom_path = copyfrom_path[len(source_base):].strip("/")
617 #print ">> process_svn_log_entry: copyfrom_path (after): " + copyfrom_path
618 svn_copy = True
619 # If this add was a copy-from, do a smart replay of the ancestors' history.
620 # Else just copy/export the files from the source repo and "svn add" them.
621 if svn_copy:
622 if debug:
623 print ">> process_svn_log_entry: svn_copy: copy-from: " + copyfrom_path+"@"+str(copyfrom_rev) + " source_base: "+source_base + " len(ancestors): " + str(len(ancestors))
624 ## If the copyfrom_path is inside the current working-copy, then do a straight-up "svn copy".
625 #if source_base in copyfrom_path:
626 # ...but not if the target is already tracked, because this might run several times for the same path.
627 if not ancestors:
628 # TODO: Is there a better way to avoid recusion bugs? Maybe a collection of processed paths?
629 if not in_svn(path_offset):
630 run_svn(["copy", copyfrom_path, path_offset])
631 else:
632 # Replay any actions which happened to this folder from the ancestor path(s).
633 replay_svn_ancestors(ancestors, source_repos_url, source_url, target_url, original_wc)
634 else:
635 # Create (parent) directory if needed
636 if d['kind'] == 'dir':
637 p_path = path_offset
638 else:
639 p_path = os.path.dirname(path_offset).strip() or '.'
640 if not os.path.exists(p_path):
641 os.makedirs(p_path)
642 # Export the entire added tree. Can't use shutil.copytree() from original_wc
643 # since that would copy ".svn" folders on SVN pre-1.7. Also, in cases where the
644 # copy-from is from some path in the source_repos _outside_ of our source_base,
645 # original_wc won't even have the source files we want to copy.
646 run_svn(["export", "--force", "-r", str(copyfrom_rev),
647 source_repos_url + copyfrom_path + "@" + str(copyfrom_rev), path_offset])
648 run_svn(["add", "--parents", path_offset])
649 # TODO: Need to copy SVN properties from source repos
650
651 elif action == 'D':
652 # Queue "svn remove" commands, to allow the action == 'A' handling the opportunity
653 # to do smart "svn copy" handling on copy/move/renames.
654 removed_paths.append(path_offset)
655
656 elif action == 'R':
657 # TODO
658 display_error("Internal Error: Handling for action='R' not implemented yet.")
659
660 elif action == 'M':
661 modified_paths.append(path_offset)
662
663 else:
664 display_error("Internal Error: pull_svn_rev: Unhandled 'action' value: '" + action + "'")
665
666 if removed_paths:
667 for r in removed_paths:
668 # TODO: Is the "svn up" here needed?
669 run_svn(["up", r])
670 run_svn(["remove", "--force", r])
671
672 if modified_paths:
673 for m in modified_paths:
674 # TODO: Is the "svn up" here needed?
675 run_svn(["up", m])
676 m_url = source_url + "/" + m
677 out = run_svn(["merge", "-c", str(svn_rev), "--non-recursive",
678 "--non-interactive", "--accept=theirs-full",
679 m_url+"@"+str(svn_rev), m])
680 # if conflicts, use the copy from original_wc
681 # TODO: Is this handling even needed, now that we're passing --accept=theirs-full?
682 if out and out.split()[0] == 'C':
683 print "\n### Conflicts ignored: %s, in revision: %s\n" \
684 % (m, svn_rev)
685 run_svn(["revert", "--recursive", m])
686 if os.path.isfile(m):
687 shutil.copy(original_wc + os.sep + m, m)
688
689 if unrelated_paths:
690 print "Unrelated paths: (vs. '" + source_base + "')"
691 print "*", unrelated_paths
692
693 return commit_paths
694
695 def pull_svn_rev(log_entry, source_repos_url, source_url, target_url, original_wc, keep_author=False):
696 """
697 Pull SVN changes from the given log entry.
698 Returns the new SVN revision.
699 If an exception occurs, it will rollback to revision 'svn_rev - 1'.
700 """
701 # Get the relative offset of source_url based on source_repos_url, e.g. u'/branches/bug123'
702 source_base = source_url[len(source_repos_url):]
703
704 svn_rev = log_entry['revision']
705 run_svn(["up", "--ignore-externals", "-r", svn_rev, original_wc])
706 commit_paths = process_svn_log_entry(log_entry, source_repos_url,
707 source_url, target_url, original_wc)
708
709 # If we had too many individual paths to commit, wipe the list and just commit at
710 # the root of the working copy.
711 if len (commit_paths) > 99:
712 commit_paths = []
713
714 try:
715 commit_from_svn_log_entry(log_entry, commit_paths,
716 keep_author=keep_author)
717 except ExternalCommandFailed:
718 # try to ignore the Properties conflicts on files and dirs
719 # use the copy from original_wc
720 # TODO: Need to re-work this?
721 has_Conflict = False
722 for d in log_entry['changed_paths']:
723 p = d['path']
724 p = p[len(source_base):].strip("/")
725 if os.path.isfile(p):
726 if os.path.isfile(p + ".prej"):
727 has_Conflict = True
728 shutil.copy(original_wc + os.sep + p, p)
729 p2=os.sep + p.replace('_', '__').replace('/', '_') \
730 + ".prej-" + str(svn_rev)
731 shutil.move(p + ".prej", os.path.dirname(original_wc) + p2)
732 w="\n### Properties conflicts ignored:"
733 print "%s %s, in revision: %s\n" % (w, p, svn_rev)
734 elif os.path.isdir(p):
735 if os.path.isfile(p + os.sep + "dir_conflicts.prej"):
736 has_Conflict = True
737 p2=os.sep + p.replace('_', '__').replace('/', '_') \
738 + "_dir__conflicts.prej-" + str(svn_rev)
739 shutil.move(p + os.sep + "dir_conflicts.prej",
740 os.path.dirname(original_wc) + p2)
741 w="\n### Properties conflicts ignored:"
742 print "%s %s, in revision: %s\n" % (w, p, svn_rev)
743 out = run_svn(["propget", "svn:ignore",
744 original_wc + os.sep + p])
745 if out:
746 run_svn(["propset", "svn:ignore", out.strip(), p])
747 out = run_svn(["propget", "svn:externel",
748 original_wc + os.sep + p])
749 if out:
750 run_svn(["propset", "svn:external", out.strip(), p])
751 # try again
752 if has_Conflict:
753 commit_from_svn_log_entry(log_entry, commit_paths,
754 keep_author=keep_author)
755 else:
756 raise ExternalCommandFailed
757
758
759 def main():
760 usage = "Usage: %prog [-a] [-c] [-r SVN rev] <Source SVN URL> <Target SVN URL>"
761 parser = OptionParser(usage)
762 parser.add_option("-a", "--keep-author", action="store_true",
763 dest="keep_author", help="Keep revision Author or not")
764 parser.add_option("-c", "--continue-from-break", action="store_true",
765 dest="cont_from_break",
766 help="Continue from previous break")
767 parser.add_option("-r", "--svn-rev", type="int", dest="svn_rev",
768 help="SVN revision to checkout from")
769 (options, args) = parser.parse_args()
770 if len(args) != 2:
771 display_error("incorrect number of arguments\n\nTry: svn2svn.py --help",
772 False)
773
774 source_url = args.pop(0).rstrip("/")
775 target_url = args.pop(0).rstrip("/")
776 if options.keep_author:
777 keep_author = True
778 else:
779 keep_author = False
780
781 # Find the greatest_rev
782 # don't use 'svn info' to get greatest_rev, it doesn't work sometimes
783 svn_log = get_one_svn_log_entry(source_url, "HEAD", "HEAD")
784 greatest_rev = svn_log['revision']
785
786 original_wc = "_original_wc"
787 dup_wc = "_dup_wc"
788
789 ## old working copy does not exist, disable continue mode
790 if not os.path.exists(dup_wc):
791 options.cont_from_break = False
792
793 if not options.cont_from_break:
794 # Warn if Target SVN URL existed
795 cmd = find_program("svn")
796 pipe = Popen([cmd] + ["list"] + [target_url], executable=cmd,
797 stdout=PIPE, stderr=PIPE)
798 out, err = pipe.communicate()
799 if pipe.returncode == 0:
800 print "Target SVN URL: %s existed!" % target_url
801 if out:
802 print out
803 print "Press 'Enter' to Continue, 'Ctrl + C' to Cancel..."
804 print "(Timeout in 5 seconds)"
805 rfds, wfds, efds = select.select([sys.stdin], [], [], 5)
806
807 # Get log entry for the SVN revision we will check out
808 if options.svn_rev:
809 # If specify a rev, get log entry just before or at rev
810 svn_start_log = get_last_svn_log_entry(source_url, 1,
811 options.svn_rev)
812 else:
813 # Otherwise, get log entry of branch creation
814 svn_start_log = get_first_svn_log_entry(source_url, 1,
815 greatest_rev)
816
817 # This is the revision we will checkout from
818 svn_rev = svn_start_log['revision']
819
820 # Check out first revision (changeset) from Source SVN URL
821 if os.path.exists(original_wc):
822 shutil.rmtree(original_wc)
823 svn_checkout(source_url, original_wc, svn_rev)
824
825 # Import first revision (changeset) into Target SVN URL
826 # TODO: Rather than using "svn import" here, use "svn export" + "svn add"
827 # so that we can uniformly run a pre-commit clean-up script.
828 timestamp = int(svn_start_log['date'])
829 svn_date = str(datetime.fromtimestamp(timestamp))
830 if keep_author:
831 run_svn(["import", original_wc, target_url, "-m",
832 svn_start_log['message'] + "\nDate: " + svn_date,
833 "--username", svn_start_log['author']])
834 else:
835 run_svn(["import", original_wc, target_url, "-m",
836 svn_start_log['message'] + "\nDate: " + svn_date +
837 "\nAuthor: " + svn_start_log['author']])
838
839 # Check out a working copy
840 if os.path.exists(dup_wc):
841 shutil.rmtree(dup_wc)
842 svn_checkout(target_url, dup_wc)
843
844 original_wc = os.path.abspath(original_wc)
845 dup_wc = os.path.abspath(dup_wc)
846 os.chdir(dup_wc)
847
848 # Get SVN info
849 svn_info = get_svn_info(source_url)
850 # Get the base URL for the source repos
851 # e.g. u'svn://svn.example.com/svn/repo'
852 source_repos_url = svn_info['repos_url']
853
854 if options.cont_from_break:
855 svn_rev = svn_info['revision'] - 1
856 if svn_rev < 1:
857 svn_rev = 1
858
859 # Load SVN log starting from svn_rev + 1
860 it_log_entries = iter_svn_log_entries(source_url, svn_rev + 1, greatest_rev)
861
862 try:
863 for log_entry in it_log_entries:
864 pull_svn_rev(log_entry, source_repos_url, source_url, target_url,
865 original_wc, keep_author)
866
867 except KeyboardInterrupt:
868 print "\nStopped by user."
869 run_svn(["cleanup"])
870 run_svn(["revert", "--recursive", "."])
871 except:
872 print "\nCommand failed with following error:\n"
873 traceback.print_exc()
874 run_svn(["cleanup"])
875 run_svn(["revert", "--recursive", "."])
876 finally:
877 run_svn(["up"])
878 print "\nFinished!"
879
880
881 if __name__ == "__main__":
882 main()
883
884 # vim:sts=4:sw=4: