]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn.py
Add warning about initial get_first_svn_log_entry() can be *very* slow
[svn2svn.git] / svn2svn.py
1 #!/usr/bin/env python
2 """
3 svn2svn.py
4
5 Replicate (replay) changesets from one SVN repository to another:
6 * Maintains full logical history (e.g. uses "svn copy" for renames).
7 * Maintains original commit messages.
8 * Cannot maintain original commit date, but appends original commit date
9 for each commit message: "Date: %d".
10 * Optionally maintain source author info. (Only supported if accessing
11 target SVN repo via file://)
12 * Optionally run an external shell script before each replayed commit
13 to give the ability to dynamically exclude or modify files as part
14 of the replay.
15
16 License: GPLv2, the same as hgsvn.
17 Author: Tony Duckles (https://github.com/tonyduckles/svn2svn)
18 (This is a forked and modified verison of http://code.google.com/p/svn2svn/)
19 """
20
21 import os
22 import sys
23 import time
24 import locale
25 import shutil
26 import select
27 import calendar
28 import traceback
29
30 from optparse import OptionParser
31 from subprocess import Popen, PIPE
32 from datetime import datetime
33
34 try:
35 from xml.etree import cElementTree as ET
36 except ImportError:
37 try:
38 from xml.etree import ElementTree as ET
39 except ImportError:
40 try:
41 import cElementTree as ET
42 except ImportError:
43 from elementtree import ElementTree as ET
44
45 svn_log_args = ['log', '--xml', '-v']
46 svn_info_args = ['info', '--xml']
47 svn_checkout_args = ['checkout', '-q']
48 svn_status_args = ['status', '--xml', '-v', '--ignore-externals']
49
50 # Setup debug options
51 debug = False
52 debug_runsvn_timing = False # Display how long each "svn" OS command took to run?
53 # Setup verbosity options
54 runsvn_showcmd = False # Display every "svn" OS command we run?
55 runsvn_showout = False # Display the stdout results from every "svn" OS command we run?
56 svnlog_verbose = True # Display each action + changed-path as we walk the history?
57
58 # define exception class
59 class ExternalCommandFailed(RuntimeError):
60 """
61 An external command failed.
62 """
63
64 def display_error(message, raise_exception = True):
65 """
66 Display error message, then terminate.
67 """
68 print "Error:", message
69 print
70 if raise_exception:
71 raise ExternalCommandFailed
72 else:
73 sys.exit(1)
74
75 # Windows compatibility code by Bill Baxter
76 if os.name == "nt":
77 def find_program(name):
78 """
79 Find the name of the program for Popen.
80 Windows is finnicky about having the complete file name. Popen
81 won't search the %PATH% for you automatically.
82 (Adapted from ctypes.find_library)
83 """
84 # See MSDN for the REAL search order.
85 base, ext = os.path.splitext(name)
86 if ext:
87 exts = [ext]
88 else:
89 exts = ['.bat', '.exe']
90 for directory in os.environ['PATH'].split(os.pathsep):
91 for e in exts:
92 fname = os.path.join(directory, base + e)
93 if os.path.exists(fname):
94 return fname
95 return None
96 else:
97 def find_program(name):
98 """
99 Find the name of the program for Popen.
100 On Unix, popen isn't picky about having absolute paths.
101 """
102 return name
103
104 def shell_quote(s):
105 if os.name == "nt":
106 q = '"'
107 else:
108 q = "'"
109 return q + s.replace('\\', '\\\\').replace("'", "'\"'\"'") + q
110
111 locale_encoding = locale.getpreferredencoding()
112
113 def run_svn(args, fail_if_stderr=False, encoding="utf-8"):
114 """
115 Run svn cmd in PIPE
116 exit if svn cmd failed
117 """
118 def _transform_arg(a):
119 if isinstance(a, unicode):
120 a = a.encode(encoding or locale_encoding)
121 elif not isinstance(a, str):
122 a = str(a)
123 return a
124 t_args = map(_transform_arg, args)
125
126 cmd = find_program("svn")
127 cmd_string = str(" ".join(map(shell_quote, [cmd] + t_args)))
128 if runsvn_showcmd:
129 print "$", "("+os.getcwd()+")", cmd_string
130 if debug_runsvn_timing:
131 time1 = time.time()
132 pipe = Popen([cmd] + t_args, executable=cmd, stdout=PIPE, stderr=PIPE)
133 out, err = pipe.communicate()
134 if debug_runsvn_timing:
135 time2 = time.time()
136 print "(" + str(round(time2-time1,4)) + " elapsed)"
137 if out and runsvn_showout:
138 print out
139 if pipe.returncode != 0 or (fail_if_stderr and err.strip()):
140 display_error("External program failed (return code %d): %s\n%s"
141 % (pipe.returncode, cmd_string, err))
142 return out
143
144 def svn_date_to_timestamp(svn_date):
145 """
146 Parse an SVN date as read from the XML output and
147 return the corresponding timestamp.
148 """
149 # Strip microseconds and timezone (always UTC, hopefully)
150 # XXX there are various ISO datetime parsing routines out there,
151 # cf. http://seehuhn.de/comp/pdate
152 date = svn_date.split('.', 2)[0]
153 time_tuple = time.strptime(date, "%Y-%m-%dT%H:%M:%S")
154 return calendar.timegm(time_tuple)
155
156 def parse_svn_info_xml(xml_string):
157 """
158 Parse the XML output from an "svn info" command and extract
159 useful information as a dict.
160 """
161 d = {}
162 tree = ET.fromstring(xml_string)
163 entry = tree.find('.//entry')
164 if entry:
165 d['url'] = entry.find('url').text
166 d['revision'] = int(entry.get('revision'))
167 d['repos_url'] = tree.find('.//repository/root').text
168 d['last_changed_rev'] = int(tree.find('.//commit').get('revision'))
169 d['kind'] = entry.get('kind')
170 return d
171
172 def parse_svn_log_xml(xml_string):
173 """
174 Parse the XML output from an "svn log" command and extract
175 useful information as a list of dicts (one per log changeset).
176 """
177 l = []
178 tree = ET.fromstring(xml_string)
179 for entry in tree.findall('logentry'):
180 d = {}
181 d['revision'] = int(entry.get('revision'))
182 # Some revisions don't have authors, most notably
183 # the first revision in a repository.
184 author = entry.find('author')
185 d['author'] = author is not None and author.text or None
186 d['date'] = svn_date_to_timestamp(entry.find('date').text)
187 # Some revisions may have empty commit message
188 message = entry.find('msg')
189 message = message is not None and message.text is not None \
190 and message.text.strip() or ""
191 # Replace DOS return '\r\n' and MacOS return '\r' with unix return '\n'
192 d['message'] = message.replace('\r\n', '\n').replace('\n\r', '\n'). \
193 replace('\r', '\n')
194 paths = d['changed_paths'] = []
195 for path in entry.findall('.//path'):
196 copyfrom_rev = path.get('copyfrom-rev')
197 if copyfrom_rev:
198 copyfrom_rev = int(copyfrom_rev)
199 paths.append({
200 'path': path.text,
201 'kind': path.get('kind'),
202 'action': path.get('action'),
203 'copyfrom_path': path.get('copyfrom-path'),
204 'copyfrom_revision': copyfrom_rev,
205 })
206 # Need to sort paths (i.e. into hierarchical order), so that process_svn_log_entry()
207 # can process actions in depth-first order.
208 paths.sort()
209 l.append(d)
210 return l
211
212 def parse_svn_status_xml(xml_string, base_dir=None):
213 """
214 Parse the XML output from an "svn status" command and extract
215 useful info as a list of dicts (one per status entry).
216 """
217 l = []
218 tree = ET.fromstring(xml_string)
219 for entry in tree.findall('.//entry'):
220 d = {}
221 path = entry.get('path')
222 if base_dir is not None:
223 assert path.startswith(base_dir)
224 path = path[len(base_dir):].lstrip('/\\')
225 d['path'] = path
226 wc_status = entry.find('wc-status')
227 if wc_status.get('item') == 'external':
228 d['type'] = 'external'
229 elif wc_status.get('revision') is not None:
230 d['type'] = 'normal'
231 else:
232 d['type'] = 'unversioned'
233 l.append(d)
234 return l
235
236 def get_svn_info(svn_url_or_wc, rev_number=None):
237 """
238 Get SVN information for the given URL or working copy,
239 with an optionally specified revision number.
240 Returns a dict as created by parse_svn_info_xml().
241 """
242 if rev_number is not None:
243 args = [svn_url_or_wc + "@" + str(rev_number)]
244 else:
245 args = [svn_url_or_wc]
246 xml_string = run_svn(svn_info_args + args, fail_if_stderr=True)
247 return parse_svn_info_xml(xml_string)
248
249 def svn_checkout(svn_url, checkout_dir, rev_number=None):
250 """
251 Checkout the given URL at an optional revision number.
252 """
253 args = []
254 if rev_number is not None:
255 args += ['-r', rev_number]
256 args += [svn_url, checkout_dir]
257 return run_svn(svn_checkout_args + args)
258
259 def run_svn_log(svn_url_or_wc, rev_start, rev_end, limit, stop_on_copy=False):
260 """
261 Fetch up to 'limit' SVN log entries between the given revisions.
262 """
263 if stop_on_copy:
264 args = ['--stop-on-copy']
265 else:
266 args = []
267 url = str(svn_url_or_wc)
268 if rev_start != 'HEAD' and rev_end != 'HEAD':
269 args += ['-r', '%s:%s' % (rev_start, rev_end)]
270 if not "@" in svn_url_or_wc:
271 url += "@" + str(max(rev_start, rev_end))
272 args += ['--limit', str(limit), url]
273 xml_string = run_svn(svn_log_args + args)
274 return parse_svn_log_xml(xml_string)
275
276 def get_svn_status(svn_wc, flags=None):
277 """
278 Get SVN status information about the given working copy.
279 """
280 # Ensure proper stripping by canonicalizing the path
281 svn_wc = os.path.abspath(svn_wc)
282 args = []
283 if flags:
284 args += [flags]
285 args += [svn_wc]
286 xml_string = run_svn(svn_status_args + args)
287 return parse_svn_status_xml(xml_string, svn_wc)
288
289 def get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=False):
290 """
291 Get the first SVN log entry in the requested revision range.
292 """
293 entries = run_svn_log(svn_url, rev_start, rev_end, 1, stop_on_copy)
294 if not entries:
295 display_error("No SVN log for %s between revisions %s and %s" %
296 (svn_url, rev_start, rev_end))
297
298 return entries[0]
299
300 def get_first_svn_log_entry(svn_url, rev_start, rev_end):
301 """
302 Get the first log entry after/at the given revision number in an SVN branch.
303 By default the revision number is set to 0, which will give you the log
304 entry corresponding to the branch creaction.
305
306 NOTE: to know whether the branch creation corresponds to an SVN import or
307 a copy from another branch, inspect elements of the 'changed_paths' entry
308 in the returned dictionary.
309 """
310 return get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True)
311
312 def get_last_svn_log_entry(svn_url, rev_start, rev_end):
313 """
314 Get the last log entry before/at the given revision number in an SVN branch.
315 By default the revision number is set to HEAD, which will give you the log
316 entry corresponding to the latest commit in branch.
317 """
318 return get_one_svn_log_entry(svn_url, rev_end, rev_start, stop_on_copy=True)
319
320
321 log_duration_threshold = 10.0
322 log_min_chunk_length = 10
323
324 def iter_svn_log_entries(svn_url, first_rev, last_rev):
325 """
326 Iterate over SVN log entries between first_rev and last_rev.
327
328 This function features chunked log fetching so that it isn't too nasty
329 to the SVN server if many entries are requested.
330 """
331 cur_rev = first_rev
332 chunk_length = log_min_chunk_length
333 chunk_interval_factor = 1.0
334 while last_rev == "HEAD" or cur_rev <= last_rev:
335 start_t = time.time()
336 stop_rev = min(last_rev, cur_rev + int(chunk_length * chunk_interval_factor))
337 entries = run_svn_log(svn_url, cur_rev, stop_rev, chunk_length)
338 duration = time.time() - start_t
339 if not entries:
340 if stop_rev == last_rev:
341 break
342 cur_rev = stop_rev + 1
343 chunk_interval_factor *= 2.0
344 continue
345 for e in entries:
346 yield e
347 cur_rev = e['revision'] + 1
348 # Adapt chunk length based on measured request duration
349 if duration < log_duration_threshold:
350 chunk_length = int(chunk_length * 2.0)
351 elif duration > log_duration_threshold * 2:
352 chunk_length = max(log_min_chunk_length, int(chunk_length / 2.0))
353
354 def commit_from_svn_log_entry(entry, files=None, keep_author=False):
355 """
356 Given an SVN log entry and an optional sequence of files, do an svn commit.
357 """
358 # TODO: Run optional external shell hook here, for doing pre-commit filtering
359 # This will use the local timezone for displaying commit times
360 timestamp = int(entry['date'])
361 svn_date = str(datetime.fromtimestamp(timestamp))
362 # Uncomment this one one if you prefer UTC commit times
363 #svn_date = "%d 0" % timestamp
364 if keep_author:
365 options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date, "--username", entry['author']]
366 else:
367 options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date + "\nAuthor: " + entry['author']]
368 if files:
369 options += list(files)
370 run_svn(options)
371
372 def in_svn(p):
373 """
374 Check if a given file/folder is being tracked by Subversion.
375 Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
376 With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
377 Use "svn status" to check the status of the file/folder.
378 """
379 # TODO: Is there a better way to do this?
380 entries = get_svn_status(p)
381 if not entries:
382 return False
383 d = entries[0]
384 return (d['type'] == 'normal')
385
386 def find_svn_ancestors(source_repos_url, source_base, source_offset, copyfrom_path, copyfrom_rev):
387 """
388 Given a copy-from path (copyfrom_path), walk the SVN history backwards to inspect
389 the ancestory of that path. Build a collection of copyfrom_path+revision pairs
390 for each of the branch-copies since the initial branch-creation. If we find a
391 copyfrom_path which source_base is a substring match of (e.g. we crawled back to
392 the initial branch-copy from trunk), then return the collection of ancestor paths.
393 Otherwise, copyfrom_path has no ancestory compared to source_base.
394
395 This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
396 file/folder was renamed in a branch and then that branch was merged back to trunk.
397
398 PARAMETERS:
399 * source_repos_url = Full URL to root of repository, e.g. 'file:///path/to/repos'
400 * source_base = e.g. '/trunk'
401 * source_offset = e.g. 'projectA/file1.txt'
402 * copyfrom_path = e.g. '/branches/bug123/projectA/file1.txt'
403 """
404
405 done = False
406 working_path = copyfrom_path
407 working_base = copyfrom_path[:-len(source_offset)].rstrip('/')
408 working_offset = source_offset.strip('/')
409 working_rev = copyfrom_rev
410 ancestors = [{'path': [working_base, working_offset], 'revision': working_rev}]
411 while not done:
412 # Get the first "svn log" entry for this path (relative to @rev)
413 #working_path = working_base + "/" + working_offset
414 if debug:
415 print ">> find_svn_ancestors: " + source_repos_url + working_path + "@" + str(working_rev) + \
416 " (" + working_base + " " + working_offset + ")"
417 log_entry = get_first_svn_log_entry(source_repos_url + working_path + "@" + str(working_rev), 1, str(working_rev))
418 if not log_entry:
419 done = True
420 # Find the action for our working_path in this revision
421 for d in log_entry['changed_paths']:
422 path = d['path']
423 if not path in working_path:
424 continue
425 # Check action-type for this file
426 action = d['action']
427 if action not in 'MARD':
428 display_error("In SVN rev. %d: action '%s' not supported. \
429 Please report a bug!" % (log_entry['revision'], action))
430 if debug:
431 debug_desc = ": " + action + " " + path
432 if d['copyfrom_path']:
433 debug_desc += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
434 print debug_desc
435
436 if action == 'R':
437 # If file/folder was replaced, it has no ancestor
438 return []
439 if action == 'D':
440 # If file/folder was deleted, it has no ancestor
441 return []
442 if action == 'A':
443 # If file/folder was added but not a copy, it has no ancestor
444 if not d['copyfrom_path']:
445 return []
446 # Else, file/folder was added and is a copy, so check ancestors
447 path_old = d['copyfrom_path']
448 working_path = working_path.replace(path, path_old)
449 if working_base in working_path:
450 # If the new and old working_path share the same working_base, just need to update working_offset.
451 working_offset = working_path[len(working_base)+1:]
452 else:
453 # Else, assume that working_base has changed but working_offset is the same, e.g. a re-branch.
454 # TODO: Is this a safe assumption?!
455 working_base = working_path[:-len(working_offset)].rstrip('/')
456 working_rev = d['copyfrom_revision']
457 if debug:
458 print ">> find_svn_ancestors: copy-from: " + working_base + " " + working_offset + "@" + str(working_rev)
459 ancestors.append({'path': [working_base, working_offset], 'revision': working_rev})
460 # If we found a copy-from case which matches our source_base, we're done
461 if (path_old == source_base) or (path_old.startswith(source_base + "/")):
462 return ancestors
463 # Else, follow the copy and keep on searching
464 break
465 return None
466
467 def replay_svn_ancestors(ancestors, source_repos_url, source_url, target_url):
468 """
469 Given an array of ancestor info (find_svn_ancestors), replay the history
470 to correctly track renames ("svn copy/move") across branch-merges.
471
472 For example, consider a sequence of events like this:
473 1. svn copy /trunk /branches/fix1
474 2. (Make some changes on /branches/fix1)
475 3. svn copy /branches/fix1/Proj1 /branches/fix1/Proj2 " Rename folder
476 4. svn copy /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt " Rename file inside renamed folder
477 5. svn co /trunk && svn merge /branches/fix1
478 After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
479 and and add of /trunk/Proj2 comp-from /branches/fix1/Proj2. If we were just
480 to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
481 we'd lose the logical history that Proj2/file2.txt is really a descendant
482 of Proj1/file1.txt.
483
484 'source_repos_url' is the full URL to the root of the source repository.
485 'ancestors' is the array returned by find_svn_ancestors() with the final
486 destination info appended to it by process_svn_log_entry().
487 'dest_path'
488 """
489 # Ignore ancestors[0], which is the original (pre-branch-copy) trunk path
490 # Ignore ancestors[1], which is the original branch-creation commit
491 # Ignore ancestors[n], which is the final commit back to trunk
492 for idx in range(1, len(ancestors)-1):
493 ancestor = ancestors[idx]
494 source_base = ancestor['path'][0]
495 source_offset = ancestor['path'][1]
496 source_path = source_base + "/" + source_offset
497 source_rev = ancestor['revision']
498 source_rev_next = ancestors[idx+1]['revision']
499 # Do a "svn log" on the _parent_ directory of source_path, since trying to get log info
500 # for the "old path" on the revision where the copy/move happened will fail.
501 if "/" in source_path:
502 p_source_path = source_path[:source_path.rindex('/')]
503 else:
504 p_source_path = ""
505 if debug:
506 print ">> replay_svn_ancestors: ["+str(idx)+"]" + source_path+"@"+str(source_rev) + " ["+p_source_path+"@"+str(source_rev)+":"+str(source_rev_next-1)+"]"
507 it_log_entries = iter_svn_log_entries(source_repos_url+p_source_path, source_rev, source_rev_next-1)
508 for log_entry in it_log_entries:
509 #print ">> replay_svn_ancestors: log_entry: (" + source_repos_url+source_base + ")"
510 #print log_entry
511 process_svn_log_entry(log_entry, source_repos_url, source_repos_url+source_base, target_url)
512
513 def process_svn_log_entry(log_entry, source_repos_url, source_url, target_url, source_offset=""):
514 """
515 Process SVN changes from the given log entry.
516 Returns array of all the paths in the working-copy that were changed,
517 i.e. the paths which need to be "svn commit".
518
519 'log_entry' is the array structure built by parse_svn_log_xml().
520 'source_repos_url' is the full URL to the root of the source repository.
521 'source_url' is the full URL to the source path in the source repository.
522 'target_url' is the full URL to the target path in the target repository.
523 """
524 # Get the relative offset of source_url based on source_repos_url, e.g. u'/branches/bug123'
525 source_base = source_url[len(source_repos_url):]
526 if debug:
527 print ">> process_svn_log_entry: " + source_url + " (" + source_base + ")"
528
529 svn_rev = log_entry['revision']
530
531 removed_paths = []
532 modified_paths = []
533 unrelated_paths = []
534 commit_paths = []
535
536 for d in log_entry['changed_paths']:
537 if svnlog_verbose:
538 msg = " " + d['action'] + " " + d['path']
539 if d['copyfrom_path']:
540 msg += " (from " + d['copyfrom_path'] + "@" + str(d['copyfrom_revision']) + ")"
541 print msg
542 # Get the full path for this changed_path
543 # e.g. u'/branches/bug123/projectA/file1.txt'
544 path = d['path']
545 if not path.startswith(source_base + "/"):
546 # Ignore changed files that are not part of this subdir
547 if path != source_base:
548 print ">> process_svn_log_entry: Unrelated path: " + path + " (" + source_base + ")"
549 unrelated_paths.append(path)
550 continue
551 # Calculate the offset (based on source_base) for this changed_path
552 # e.g. u'projectA/file1.txt'
553 # (path = source_base + "/" + path_offset)
554 path_offset = path[len(source_base):].strip("/")
555 # Get the action for this path
556 action = d['action']
557 if action not in 'MARD':
558 display_error("In SVN rev. %d: action '%s' not supported. \
559 Please report a bug!" % (svn_rev, action))
560
561 # Try to be efficient and keep track of an explicit list of paths in the
562 # working copy that changed. If we commit from the root of the working copy,
563 # then SVN needs to crawl the entire working copy looking for pending changes.
564 # But, if we gather too many paths to commit, then we wipe commit_paths below
565 # and end-up doing a commit at the root of the working-copy.
566 if len (commit_paths) < 100:
567 commit_paths.append(path_offset)
568
569 # Special-handling for replace's
570 if action == 'R':
571 # If file was "replaced" (deleted then re-added, all in same revision),
572 # then we need to run the "svn rm" first, then change action='A'. This
573 # lets the normal code below handle re-"svn add"'ing the files. This
574 # should replicate the "replace".
575 run_svn(["up", path_offset])
576 run_svn(["remove", "--force", path_offset])
577 action = 'A'
578
579 # Handle all the various action-types
580 # (Handle "add" first, for "svn copy/move" support)
581 if action == 'A':
582 # Determine where to export from
583 copyfrom_rev = svn_rev
584 copyfrom_path = path
585 svn_copy = False
586 # Handle cases where this "add" was a copy from another URL in the source repos
587 if d['copyfrom_revision']:
588 copyfrom_rev = d['copyfrom_revision']
589 copyfrom_path = d['copyfrom_path']
590 if debug:
591 print ">> process_svn_log_entry: copy-to: " + source_base + " " + source_offset + " " + path_offset
592 if source_base in copyfrom_path:
593 # If the copy-from path is inside the current working-copy, no need to check ancestry.
594 ancestors = []
595 copyfrom_path = copyfrom_path[len(source_base):].strip("/")
596 if debug:
597 print ">> process_svn_log_entry: Found copy: " + copyfrom_path+"@"+str(copyfrom_rev)
598 svn_copy = True
599 else:
600 ancestors = find_svn_ancestors(source_repos_url, source_base, path_offset,
601 copyfrom_path, copyfrom_rev)
602 if ancestors:
603 # Reverse the list, so that we loop in chronological order
604 ancestors.reverse()
605 # Append the current revision
606 ancestors.append({'path': [source_base, path_offset], 'revision': svn_rev})
607 # ancestors[0] is the original (pre-branch-copy) trunk path.
608 # ancestors[1] is the first commit on the new branch.
609 copyfrom_rev = ancestors[0]['revision']
610 copyfrom_base = ancestors[0]['path'][0]
611 copyfrom_offset = ancestors[0]['path'][1]
612 copyfrom_path = copyfrom_base + copyfrom_offset
613 if debug:
614 print ">> process_svn_log_entry: FOUND PARENT:"
615 for idx in range(0,len(ancestors)):
616 ancestor = ancestors[idx]
617 print " ["+str(idx)+"] " + ancestor['path'][0]+" "+ancestor['path'][1]+"@"+str(ancestor['revision'])
618 #print ">> process_svn_log_entry: copyfrom_path (before): " + copyfrom_path + " source_base: " + source_base + " p: " + p
619 copyfrom_path = copyfrom_path[len(source_base):].strip("/")
620 #print ">> process_svn_log_entry: copyfrom_path (after): " + copyfrom_path
621 svn_copy = True
622 # If this add was a copy-from, do a smart replay of the ancestors' history.
623 # Else just copy/export the files from the source repo and "svn add" them.
624 if svn_copy:
625 if debug:
626 print ">> process_svn_log_entry: svn_copy: copy-from: " + copyfrom_path+"@"+str(copyfrom_rev) + " source_base: "+source_base + " len(ancestors): " + str(len(ancestors))
627 # If we don't have any ancestors, then this is just a straight "svn copy" in the current working-copy.
628 if not ancestors:
629 # ...but not if the target is already tracked, because this might run several times for the same path.
630 # TODO: Is there a better way to avoid recusion bugs? Maybe a collection of processed paths?
631 if not in_svn(path_offset):
632 run_svn(["copy", copyfrom_path, path_offset])
633 else:
634 # Replay any actions which happened to this folder from the ancestor path(s).
635 replay_svn_ancestors(ancestors, source_repos_url, source_url, target_url)
636 else:
637 # Create (parent) directory if needed
638 if d['kind'] == 'dir':
639 p_path = path_offset
640 else:
641 p_path = os.path.dirname(path_offset).strip() or '.'
642 if not os.path.exists(p_path):
643 os.makedirs(p_path)
644 # Export the entire added tree.
645 run_svn(["export", "--force", "-r", str(copyfrom_rev),
646 source_repos_url + copyfrom_path + "@" + str(copyfrom_rev), path_offset])
647 if not in_svn(path_offset):
648 run_svn(["add", "--parents", path_offset])
649 # TODO: Need to copy SVN properties from source repos
650
651 elif action == 'D':
652 # Queue "svn remove" commands, to allow the action == 'A' handling the opportunity
653 # to do smart "svn copy" handling on copy/move/renames.
654 removed_paths.append(path_offset)
655
656 elif action == 'R':
657 # TODO
658 display_error("Internal Error: Handling for action='R' not implemented yet.")
659
660 elif action == 'M':
661 modified_paths.append(path_offset)
662
663 else:
664 display_error("Internal Error: pull_svn_rev: Unhandled 'action' value: '" + action + "'")
665
666 if removed_paths:
667 for r in removed_paths:
668 # TODO: Is the "svn up" here needed?
669 run_svn(["up", r])
670 run_svn(["remove", "--force", r])
671
672 if modified_paths:
673 for m in modified_paths:
674 # TODO: Is the "svn up" here needed?
675 run_svn(["up", m])
676 m_url = source_url + "/" + m
677 out = run_svn(["merge", "-c", str(svn_rev), "--non-recursive",
678 "--non-interactive", "--accept=theirs-full",
679 m_url+"@"+str(svn_rev), m])
680
681 if unrelated_paths:
682 print "Unrelated paths: (vs. '" + source_base + "')"
683 print "*", unrelated_paths
684
685 return commit_paths
686
687 def pull_svn_rev(log_entry, source_repos_url, source_url, target_url, keep_author=False):
688 """
689 Pull SVN changes from the given log entry.
690 Returns the new SVN revision.
691 If an exception occurs, it will rollback to revision 'svn_rev - 1'.
692 """
693 ## Get the relative offset of source_url based on source_repos_url, e.g. u'/branches/bug123'
694 #source_base = source_url[len(source_repos_url):]
695
696 svn_rev = log_entry['revision']
697 print "\n(Starting source rev #"+str(svn_rev)+":)"
698 print "r"+str(log_entry['revision']) + " | " + \
699 log_entry['author'] + " | " + \
700 str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' '))
701 print log_entry['message']
702 print "------------------------------------------------------------------------"
703 commit_paths = process_svn_log_entry(log_entry, source_repos_url, source_url, target_url)
704
705 # If we had too many individual paths to commit, wipe the list and just commit at
706 # the root of the working copy.
707 if len (commit_paths) > 99:
708 commit_paths = []
709
710 # TODO: Use SVN properties to track source URL + rev in the target repo?
711 # This would provide a more reliable resume-support
712 try:
713 commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author)
714 except ExternalCommandFailed:
715 # try to ignore the Properties conflicts on files and dirs
716 # use the copy from original_wc
717 # TODO: Need to re-work this?
718 #has_Conflict = False
719 #for d in log_entry['changed_paths']:
720 # p = d['path']
721 # p = p[len(source_base):].strip("/")
722 # if os.path.isfile(p):
723 # if os.path.isfile(p + ".prej"):
724 # has_Conflict = True
725 # shutil.copy(original_wc + os.sep + p, p)
726 # p2=os.sep + p.replace('_', '__').replace('/', '_') \
727 # + ".prej-" + str(svn_rev)
728 # shutil.move(p + ".prej", os.path.dirname(original_wc) + p2)
729 # w="\n### Properties conflicts ignored:"
730 # print "%s %s, in revision: %s\n" % (w, p, svn_rev)
731 # elif os.path.isdir(p):
732 # if os.path.isfile(p + os.sep + "dir_conflicts.prej"):
733 # has_Conflict = True
734 # p2=os.sep + p.replace('_', '__').replace('/', '_') \
735 # + "_dir__conflicts.prej-" + str(svn_rev)
736 # shutil.move(p + os.sep + "dir_conflicts.prej",
737 # os.path.dirname(original_wc) + p2)
738 # w="\n### Properties conflicts ignored:"
739 # print "%s %s, in revision: %s\n" % (w, p, svn_rev)
740 # out = run_svn(["propget", "svn:ignore",
741 # original_wc + os.sep + p])
742 # if out:
743 # run_svn(["propset", "svn:ignore", out.strip(), p])
744 # out = run_svn(["propget", "svn:externel",
745 # original_wc + os.sep + p])
746 # if out:
747 # run_svn(["propset", "svn:external", out.strip(), p])
748 ## try again
749 #if has_Conflict:
750 # commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author)
751 #else:
752 raise ExternalCommandFailed
753 print "(Finished source rev #"+str(svn_rev)+")"
754
755
756 def main():
757 usage = "Usage: %prog [-a] [-c] [-r SVN rev] <Source SVN URL> <Target SVN URL>"
758 parser = OptionParser(usage)
759 parser.add_option("-a", "--keep-author", action="store_true",
760 dest="keep_author", help="Keep revision Author or not")
761 parser.add_option("-c", "--continue-from-break", action="store_true",
762 dest="cont_from_break",
763 help="Continue from previous break")
764 parser.add_option("-r", "--svn-rev", type="int", dest="svn_rev",
765 help="SVN revision to checkout from")
766 (options, args) = parser.parse_args()
767 if len(args) != 2:
768 display_error("incorrect number of arguments\n\nTry: svn2svn.py --help",
769 False)
770
771 source_url = args.pop(0).rstrip("/")
772 target_url = args.pop(0).rstrip("/")
773 if options.keep_author:
774 keep_author = True
775 else:
776 keep_author = False
777
778 # Find the greatest_rev in the source repo
779 svn_info = get_svn_info(source_url)
780 greatest_rev = svn_info['revision']
781
782 dup_wc = "_dup_wc"
783
784 # if old working copy does not exist, disable continue mode
785 # TODO: Better continue support. Maybe include source repo's rev # in target commit info?
786 if not os.path.exists(dup_wc):
787 options.cont_from_break = False
788
789 if not options.cont_from_break:
790 # Warn if Target SVN URL existed
791 cmd = find_program("svn")
792 pipe = Popen([cmd] + ["list"] + [target_url], executable=cmd,
793 stdout=PIPE, stderr=PIPE)
794 out, err = pipe.communicate()
795 if pipe.returncode == 0:
796 print "Target SVN URL: %s existed!" % target_url
797 if out:
798 print out
799 print "Press 'Enter' to Continue, 'Ctrl + C' to Cancel..."
800 print "(Timeout in 5 seconds)"
801 rfds, wfds, efds = select.select([sys.stdin], [], [], 5)
802
803 # Get log entry for the SVN revision we will check out
804 if options.svn_rev:
805 # If specify a rev, get log entry just before or at rev
806 svn_start_log = get_last_svn_log_entry(source_url, 1, options.svn_rev)
807 else:
808 # Otherwise, get log entry of branch creation
809 # TODO: This call is *very* expensive on a repo with lots of revisions.
810 # Even though the call is passing --limit 1, it seems like that limit-filter
811 # is happening after SVN has fetched the full log history.
812 svn_start_log = get_first_svn_log_entry(source_url, 1, greatest_rev)
813
814 # This is the revision we will start from for source_url
815 svn_rev = svn_start_log['revision']
816
817 # Check out a working copy of target_url
818 dup_wc = os.path.abspath(dup_wc)
819 if os.path.exists(dup_wc):
820 shutil.rmtree(dup_wc)
821 svn_checkout(target_url, dup_wc)
822 os.chdir(dup_wc)
823
824 # For the initial commit to the target URL, export all the contents from
825 # the source URL at the start-revision.
826 paths = run_svn(["list", "-r", str(svn_rev), source_url+"@"+str(svn_rev)])
827 paths = paths.strip("\n").split("\n")
828 for path in paths:
829 if not path:
830 # Skip null lines
831 break
832 # Directories have a trailing slash in the "svn list" output
833 if path[-1] == "/":
834 path=path.rstrip('/')
835 if not os.path.exists(path):
836 os.makedirs(path)
837 run_svn(["export", "--force", "-r" , str(svn_rev), source_url+"/"+path+"@"+str(svn_rev), path])
838 run_svn(["add", path])
839 commit_from_svn_log_entry(svn_start_log, [], keep_author)
840 else:
841 dup_wc = os.path.abspath(dup_wc)
842 os.chdir(dup_wc)
843
844 # Get SVN info
845 svn_info = get_svn_info(source_url)
846 # Get the base URL for the source repos, e.g. u'svn://svn.example.com/svn/repo'
847 source_repos_url = svn_info['repos_url']
848
849 if options.cont_from_break:
850 svn_rev = svn_info['revision'] - 1
851 if svn_rev < 1:
852 svn_rev = 1
853
854 # Load SVN log starting from svn_rev + 1
855 it_log_entries = iter_svn_log_entries(source_url, svn_rev + 1, greatest_rev)
856
857 try:
858 for log_entry in it_log_entries:
859 pull_svn_rev(log_entry, source_repos_url, source_url, target_url, keep_author)
860
861 except KeyboardInterrupt:
862 print "\nStopped by user."
863 run_svn(["cleanup"])
864 run_svn(["revert", "--recursive", "."])
865 except:
866 print "\nCommand failed with following error:\n"
867 traceback.print_exc()
868 run_svn(["cleanup"])
869 run_svn(["revert", "--recursive", "."])
870 finally:
871 run_svn(["up"])
872 print "\nFinished!"
873
874
875 if __name__ == "__main__":
876 main()
877
878 # vim:sts=4:sw=4: