]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn.py
* Debug parser option-group
[svn2svn.git] / svn2svn.py
1 #!/usr/bin/env python
2 """
3 svn2svn.py
4
5 Replicate (replay) changesets from one SVN repository to another:
6 * Maintains full logical history (e.g. uses "svn copy" for renames).
7 * Maintains original commit messages.
8 * Optionally maintain source author info. (Only supported if accessing
9 target SVN repo via file://)
10 * Cannot maintain original commit date, but appends original commit date
11 for each commit message: "Date: %d".
12 * Optionally run an external shell script before each replayed commit
13 to give the ability to dynamically exclude or modify files as part
14 of the replay.
15
16 License: GPLv2, the same as hgsvn.
17 Author: Tony Duckles (https://github.com/tonyduckles/svn2svn)
18 (This is a forked and heavily modified verison of http://code.google.com/p/svn2svn/)
19 """
20
21 import os
22 import sys
23 import time
24 import locale
25 import shutil
26 import select
27 import calendar
28 import traceback
29
30 from optparse import OptionParser,OptionGroup
31 from subprocess import Popen, PIPE
32 from datetime import datetime
33 from operator import itemgetter
34
35 try:
36 from xml.etree import cElementTree as ET
37 except ImportError:
38 try:
39 from xml.etree import ElementTree as ET
40 except ImportError:
41 try:
42 import cElementTree as ET
43 except ImportError:
44 from elementtree import ElementTree as ET
45
46 svn_log_args = ['log', '--xml']
47 svn_info_args = ['info', '--xml']
48 svn_checkout_args = ['checkout', '-q']
49 svn_status_args = ['status', '--xml', '-v', '--ignore-externals']
50
51 # Setup debug options
52 debug = False
53 runsvn_timing = False # Display how long each "svn" OS command took to run?
54 # Setup verbosity options
55 runsvn_showcmd = False # Display every "svn" OS command we run?
56 runsvn_showout = False # Display the stdout results from every "svn" OS command we run?
57 svnlog_verbose = False # Display each action + changed-path as we walk the history?
58
59 # define exception class
60 class ExternalCommandFailed(RuntimeError):
61 """
62 An external command failed.
63 """
64
65 def display_error(message, raise_exception = True):
66 """
67 Display error message, then terminate.
68 """
69 print "Error:", message
70 print
71 if raise_exception:
72 raise ExternalCommandFailed
73 else:
74 sys.exit(1)
75
76 # Windows compatibility code by Bill Baxter
77 if os.name == "nt":
78 def find_program(name):
79 """
80 Find the name of the program for Popen.
81 Windows is finnicky about having the complete file name. Popen
82 won't search the %PATH% for you automatically.
83 (Adapted from ctypes.find_library)
84 """
85 # See MSDN for the REAL search order.
86 base, ext = os.path.splitext(name)
87 if ext:
88 exts = [ext]
89 else:
90 exts = ['.bat', '.exe']
91 for directory in os.environ['PATH'].split(os.pathsep):
92 for e in exts:
93 fname = os.path.join(directory, base + e)
94 if os.path.exists(fname):
95 return fname
96 return None
97 else:
98 def find_program(name):
99 """
100 Find the name of the program for Popen.
101 On Unix, popen isn't picky about having absolute paths.
102 """
103 return name
104
105 def shell_quote(s):
106 if runsvn_showcmd:
107 import re
108 p = re.compile('^[A-Za-z0-9=-]+$')
109 if p.match(s):
110 return s
111 if os.name == "nt":
112 q = '"'
113 else:
114 q = "'"
115 return q + s.replace('\\', '\\\\').replace("'", "'\"'\"'") + q
116
117 locale_encoding = locale.getpreferredencoding()
118
119 def run_svn(args, fail_if_stderr=False, ignore_retcode_err=False, encoding="utf-8"):
120 """
121 Run svn cmd in PIPE
122 exit if svn cmd failed
123 """
124 def _transform_arg(a):
125 if isinstance(a, unicode):
126 a = a.encode(encoding or locale_encoding)
127 elif not isinstance(a, str):
128 a = str(a)
129 return a
130 t_args = map(_transform_arg, args)
131
132 cmd = find_program("svn")
133 cmd_string = str(" ".join(map(shell_quote, [cmd] + t_args)))
134 if runsvn_showcmd:
135 # Default to bright-blue for svn commands that will take action on the working-copy.
136 color = "94"
137 # For status-only commands (or commands that aren't important to highlight), show in dim-blue.
138 status_cmds = ['status', 'st', 'log', 'info', 'list', 'propset', 'update', 'up', 'cleanup', 'revert']
139 if args[0] in status_cmds:
140 color = "34"
141 print "\x1b[34m"+"$"+"\x1b["+color+"m", cmd_string + "\x1b[0m"
142 if runsvn_timing:
143 time1 = time.time()
144 pipe = Popen([cmd] + t_args, executable=cmd, stdout=PIPE, stderr=PIPE)
145 out, err = pipe.communicate()
146 if runsvn_timing:
147 time2 = time.time()
148 print "(" + str(round(time2-time1,4)) + " elapsed)"
149 if out and runsvn_showout:
150 print out
151 if (pipe.returncode != 0 and not ignore_retcode_err) or (fail_if_stderr and err.strip()):
152 display_error("External program failed (return code %d): %s\n%s"
153 % (pipe.returncode, cmd_string, err))
154 return out
155
156 def svn_date_to_timestamp(svn_date):
157 """
158 Parse an SVN date as read from the XML output and
159 return the corresponding timestamp.
160 """
161 # Strip microseconds and timezone (always UTC, hopefully)
162 # XXX there are various ISO datetime parsing routines out there,
163 # cf. http://seehuhn.de/comp/pdate
164 date = svn_date.split('.', 2)[0]
165 time_tuple = time.strptime(date, "%Y-%m-%dT%H:%M:%S")
166 return calendar.timegm(time_tuple)
167
168 def parse_svn_info_xml(xml_string):
169 """
170 Parse the XML output from an "svn info" command and extract
171 useful information as a dict.
172 """
173 d = {}
174 tree = ET.fromstring(xml_string)
175 entry = tree.find('.//entry')
176 if entry:
177 d['url'] = entry.find('url').text
178 d['revision'] = int(entry.get('revision'))
179 d['repos_url'] = tree.find('.//repository/root').text
180 d['repos_uuid'] = tree.find('.//repository/uuid').text
181 d['last_changed_rev'] = int(tree.find('.//commit').get('revision'))
182 d['kind'] = entry.get('kind')
183 return d
184
185 def parse_svn_log_xml(xml_string):
186 """
187 Parse the XML output from an "svn log" command and extract
188 useful information as a list of dicts (one per log changeset).
189 """
190 l = []
191 tree = ET.fromstring(xml_string)
192 for entry in tree.findall('logentry'):
193 d = {}
194 d['revision'] = int(entry.get('revision'))
195 # Some revisions don't have authors, most notably
196 # the first revision in a repository.
197 author = entry.find('author')
198 d['author'] = author is not None and author.text or None
199 d['date'] = svn_date_to_timestamp(entry.find('date').text)
200 # Some revisions may have empty commit message
201 message = entry.find('msg')
202 message = message is not None and message.text is not None \
203 and message.text.strip() or ""
204 # Replace DOS return '\r\n' and MacOS return '\r' with unix return '\n'
205 d['message'] = message.replace('\r\n', '\n').replace('\n\r', '\n'). \
206 replace('\r', '\n')
207 revprops = []
208 for prop in entry.findall('.//revprops/property'):
209 revprops.append({ 'name': prop.get('name'), 'value': prop.text })
210 d['revprops'] = revprops
211 paths = []
212 for path in entry.findall('.//paths/path'):
213 copyfrom_rev = path.get('copyfrom-rev')
214 if copyfrom_rev:
215 copyfrom_rev = int(copyfrom_rev)
216 paths.append({
217 'path': path.text,
218 'kind': path.get('kind'),
219 'action': path.get('action'),
220 'copyfrom_path': path.get('copyfrom-path'),
221 'copyfrom_revision': copyfrom_rev,
222 })
223 # Need to sort paths (i.e. into hierarchical order), so that process_svn_log_entry()
224 # can process actions in depth-first order.
225 d['changed_paths'] = sorted(paths, key=itemgetter('path'))
226 l.append(d)
227 return l
228
229 def parse_svn_status_xml(xml_string, base_dir=None):
230 """
231 Parse the XML output from an "svn status" command and extract
232 useful info as a list of dicts (one per status entry).
233 """
234 l = []
235 tree = ET.fromstring(xml_string)
236 for entry in tree.findall('.//entry'):
237 d = {}
238 path = entry.get('path')
239 if base_dir is not None:
240 assert path.startswith(base_dir)
241 path = path[len(base_dir):].lstrip('/\\')
242 d['path'] = path
243 wc_status = entry.find('wc-status')
244 if wc_status.get('item') == 'external':
245 d['type'] = 'external'
246 elif wc_status.get('item') == 'deleted':
247 d['type'] = 'deleted'
248 elif wc_status.get('revision') is not None:
249 d['type'] = 'normal'
250 else:
251 d['type'] = 'unversioned'
252 l.append(d)
253 return l
254
255 def get_svn_info(svn_url_or_wc, rev_number=None):
256 """
257 Get SVN information for the given URL or working copy,
258 with an optionally specified revision number.
259 Returns a dict as created by parse_svn_info_xml().
260 """
261 if rev_number is not None:
262 args = [svn_url_or_wc + "@" + str(rev_number)]
263 else:
264 args = [svn_url_or_wc]
265 xml_string = run_svn(svn_info_args + args, fail_if_stderr=True)
266 return parse_svn_info_xml(xml_string)
267
268 def svn_checkout(svn_url, checkout_dir, rev_number=None):
269 """
270 Checkout the given URL at an optional revision number.
271 """
272 args = []
273 if rev_number is not None:
274 args += ['-r', rev_number]
275 args += [svn_url, checkout_dir]
276 return run_svn(svn_checkout_args + args)
277
278 def run_svn_log(svn_url_or_wc, rev_start, rev_end, limit, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
279 """
280 Fetch up to 'limit' SVN log entries between the given revisions.
281 """
282 args = []
283 if stop_on_copy:
284 args += ['--stop-on-copy']
285 if get_changed_paths:
286 args += ['-v']
287 if get_revprops:
288 args += ['--with-all-revprops']
289 url = str(svn_url_or_wc)
290 if rev_start != 'HEAD' and rev_end != 'HEAD':
291 args += ['-r', '%s:%s' % (rev_start, rev_end)]
292 if not "@" in svn_url_or_wc:
293 url += "@" + str(max(rev_start, rev_end))
294 args += ['--limit', str(limit), url]
295 xml_string = run_svn(svn_log_args + args)
296 return parse_svn_log_xml(xml_string)
297
298 def get_svn_status(svn_wc, flags=None):
299 """
300 Get SVN status information about the given working copy.
301 """
302 # Ensure proper stripping by canonicalizing the path
303 svn_wc = os.path.abspath(svn_wc)
304 args = []
305 if flags:
306 args += [flags]
307 args += [svn_wc]
308 xml_string = run_svn(svn_status_args + args)
309 return parse_svn_status_xml(xml_string, svn_wc)
310
311 def get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
312 """
313 Get the first SVN log entry in the requested revision range.
314 """
315 entries = run_svn_log(svn_url, rev_start, rev_end, 1, stop_on_copy, get_changed_paths, get_revprops)
316 if not entries:
317 display_error("No SVN log for %s between revisions %s and %s" %
318 (svn_url, rev_start, rev_end))
319
320 return entries[0]
321
322 def get_first_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
323 """
324 Get the first log entry after/at the given revision number in an SVN branch.
325 By default the revision number is set to 0, which will give you the log
326 entry corresponding to the branch creaction.
327
328 NOTE: to know whether the branch creation corresponds to an SVN import or
329 a copy from another branch, inspect elements of the 'changed_paths' entry
330 in the returned dictionary.
331 """
332 return get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True, get_changed_paths=True)
333
334 def get_last_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
335 """
336 Get the last log entry before/at the given revision number in an SVN branch.
337 By default the revision number is set to HEAD, which will give you the log
338 entry corresponding to the latest commit in branch.
339 """
340 return get_one_svn_log_entry(svn_url, rev_end, rev_start, stop_on_copy=True, get_changed_paths=True)
341
342
343 log_duration_threshold = 10.0
344 log_min_chunk_length = 10
345
346 def iter_svn_log_entries(svn_url, first_rev, last_rev, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
347 """
348 Iterate over SVN log entries between first_rev and last_rev.
349
350 This function features chunked log fetching so that it isn't too nasty
351 to the SVN server if many entries are requested.
352 """
353 cur_rev = first_rev
354 chunk_length = log_min_chunk_length
355 chunk_interval_factor = 1.0
356 while last_rev == "HEAD" or cur_rev <= last_rev:
357 start_t = time.time()
358 stop_rev = min(last_rev, cur_rev + int(chunk_length * chunk_interval_factor))
359 entries = run_svn_log(svn_url, cur_rev, stop_rev, chunk_length, stop_on_copy , get_changed_paths, get_revprops)
360 duration = time.time() - start_t
361 if not entries:
362 if stop_rev == last_rev:
363 break
364 cur_rev = stop_rev + 1
365 chunk_interval_factor *= 2.0
366 continue
367 for e in entries:
368 yield e
369 cur_rev = e['revision'] + 1
370 # Adapt chunk length based on measured request duration
371 if duration < log_duration_threshold:
372 chunk_length = int(chunk_length * 2.0)
373 elif duration > log_duration_threshold * 2:
374 chunk_length = max(log_min_chunk_length, int(chunk_length / 2.0))
375
376 def commit_from_svn_log_entry(entry, files=None, keep_author=False, revprops=[]):
377 """
378 Given an SVN log entry and an optional sequence of files, do an svn commit.
379 """
380 # TODO: Run optional external shell hook here, for doing pre-commit filtering
381 # This will use the local timezone for displaying commit times
382 timestamp = int(entry['date'])
383 svn_date = str(datetime.fromtimestamp(timestamp))
384 # Uncomment this one one if you prefer UTC commit times
385 #svn_date = "%d 0" % timestamp
386 if keep_author:
387 options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date, "--username", entry['author']]
388 else:
389 options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date + "\nAuthor: " + entry['author']]
390 if revprops:
391 for r in revprops:
392 options += ["--with-revprop", r['name']+"="+str(r['value'])]
393 if files:
394 options += list(files)
395 print "(Committing source rev #"+str(entry['revision'])+"...)"
396 run_svn(options)
397
398 def in_svn(p):
399 """
400 Check if a given file/folder is being tracked by Subversion.
401 Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
402 With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
403 Use "svn status" to check the status of the file/folder.
404 """
405 # TODO: Is there a better way to do this?
406 entries = get_svn_status(p)
407 if not entries:
408 return False
409 d = entries[0]
410 return (d['type'] == 'normal')
411
412 def find_svn_ancestors(source_repos_url, source_url, path_offset, source_rev, \
413 copyfrom_path, copyfrom_rev, prefix = ""):
414 """
415 Given a final svn-add'd path (source_base+"/"+path_offset) and the origin copy-from
416 path (copyfrom_path), walk the SVN history backwards to inspect the ancestory of
417 that path. Build a collection of copyfrom_path+revision pairs for each of the
418 branch-copies since the initial branch-creation. If we find a copyfrom_path which
419 source_url is a substring match of (e.g. we crawled back to the initial branch-
420 copy from trunk), then return the collection of ancestor paths. Otherwise,
421 copyfrom_path has no ancestory compared to source_url.
422
423 This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
424 file/folder was renamed in a branch and then that branch was merged back to trunk.
425
426 'source_repos_url' is the full URL to the root of the source repository,
427 e.g. 'file:///path/to/repo'
428 'source_url' is the full URL to the source path in the source repository.
429 'path_offset' is the offset from source_base to the file to check ancestry for,
430 e.g. 'projectA/file1.txt'. path = source_repos_url + source_base + path_offset.
431 'source_rev' is the revision ("svn log") that we're processing from the source repo.
432 'copyfrom_path' is copy-from path, e.g. '/branches/bug123/projectA/file1.txt'
433 'copyfrom_rev' is revision this copy-from path was copied at.
434 """
435 done = False
436 source_base = source_url[len(source_repos_url):]
437 working_path = copyfrom_path
438 working_rev = copyfrom_rev
439 ancestors_temp = [{'path': source_base+"/"+path_offset, 'revision': source_rev, 'copyfrom_path': copyfrom_path, 'copyfrom_rev': copyfrom_rev}]
440 while not done:
441 # Get the first "svn log" entry for this path (relative to @rev)
442 #working_path = working_base + "/" + working_offset
443 if debug:
444 print prefix+"\x1b[33m" + ">> find_svn_ancestors: " + source_repos_url + working_path+"@"+str(working_rev) + "\x1b[0m"
445 log_entry = get_first_svn_log_entry(source_repos_url + working_path+"@"+str(working_rev), 1, str(working_rev), True)
446 if not log_entry:
447 done = True
448 break
449 # Search for any actions on our target path (or parent paths).
450 changed_paths_temp = []
451 for d in log_entry['changed_paths']:
452 path = d['path']
453 if path in working_path:
454 changed_paths_temp.append({'path': path, 'data': d})
455 if not changed_paths_temp:
456 # If no matches, then we've hit the end of the chain and this path has no ancestry back to source_url.
457 done = True
458 continue
459 # Reverse-sort any matches, so that we start with the most-granular (deepest in the tree) path.
460 changed_paths = sorted(changed_paths_temp, key=itemgetter('path'), reverse=True)
461 # Find the action for our working_path in this revision
462 for v in changed_paths:
463 d = v['data']
464 path = d['path']
465 # Check action-type for this file
466 action = d['action']
467 if action not in 'MARD':
468 display_error("In SVN rev. %d: action '%s' not supported. \
469 Please report a bug!" % (log_entry['revision'], action))
470 if debug:
471 debug_desc = "> " + action + " " + path
472 if d['copyfrom_path']:
473 debug_desc += " (from " + d['copyfrom_path']+"@"+str(d['copyfrom_revision']) + ")"
474 print prefix+"\x1b[33m" + debug_desc + "\x1b[0m"
475
476 if action == 'R':
477 # If file/folder was replaced, it has no ancestor
478 ancestors_temp = []
479 done = True
480 break
481 if action == 'D':
482 # If file/folder was deleted, it has no ancestor
483 ancestors_temp = []
484 done = True
485 break
486 if action == 'A':
487 # If file/folder was added but not a copy, it has no ancestor
488 if not d['copyfrom_path']:
489 ancestors_temp = []
490 done = True
491 break
492 # Else, file/folder was added and is a copy, so add an entry to our ancestors list
493 # and keep checking for ancestors
494 if debug:
495 print prefix+"\x1b[33m" + ">> find_svn_ancestors: Found copy-from: " + \
496 path + " --> " + d['copyfrom_path']+"@"+str(d['copyfrom_revision']) + "\x1b[0m"
497 ancestors_temp.append({'path': path, 'revision': log_entry['revision'],
498 'copyfrom_path': d['copyfrom_path'], 'copyfrom_rev': d['copyfrom_revision']})
499 working_path = working_path.replace(d['path'], d['copyfrom_path'])
500 working_rev = d['copyfrom_revision']
501 # If we found a copy-from case which matches our source_base, we're done
502 if source_base in working_path:
503 done = True
504 break
505 # Else, follow the copy and keep on searching
506 break
507 ancestors = []
508 if ancestors_temp:
509 working_path = source_base+"/"+path_offset
510 for idx in range(0, len(ancestors_temp)):
511 d = ancestors_temp[idx]
512 working_path = working_path.replace(d['path'], d['copyfrom_path'])
513 working_rev = d['copyfrom_rev']
514 ancestors.append({'path': working_path, 'revision': working_rev})
515 if debug:
516 max_len = 0
517 for idx in range(len(ancestors)):
518 d = ancestors[idx]
519 max_len = max(max_len, len(d['path']+"@"+str(d['revision'])))
520 print prefix+"\x1b[93m" + ">> find_svn_ancestors: Found parent ancestors: " + "\x1b[0m"
521 for idx in range(len(ancestors)-1):
522 d = ancestors[idx]
523 d_next = ancestors[idx+1]
524 print prefix+"\x1b[33m" + " ["+str(idx)+"] " + str(d['path']+"@"+str(d['revision'])).ljust(max_len) + \
525 " <-- " + str(d_next['path']+"@"+str(d_next['revision'])).ljust(max_len) + "\x1b[0m"
526 return ancestors
527
528 def get_rev_map(rev_map, src_rev, prefix):
529 """
530 Find the equivalent rev # in the target repo for the given rev # from the source repo.
531 """
532 print prefix + "\x1b[32m" + ">> get_rev_map("+str(src_rev)+")" + "\x1b[0m"
533 # Find the highest entry less-than-or-equal-to src_rev
534 for rev in range(src_rev, 0, -1):
535 if debug:
536 print prefix + "\x1b[32m" + ">> get_rev_map: rev="+str(rev)+" in_rev_map="+str(rev in rev_map) + "\x1b[0m"
537 if rev in rev_map:
538 return rev_map[rev]
539 # Else, we fell off the bottom of the rev_map. Ruh-roh...
540 display_error("Internal Error: get_rev_map: Unable to find match rev_map entry for src_rev=" + src_rev)
541
542 def get_svn_dirlist(svn_path, svn_rev = ""):
543 """
544 Get a list of all the child contents (recusive) of the given folder path.
545 """
546 args = ["list", "--recursive"]
547 path = svn_path
548 if svn_rev:
549 args += ["-r", str(svn_rev)]
550 path += "@"+str(svn_rev)
551 args += [path]
552 paths = run_svn(args, False, True)
553 paths = paths.strip("\n").split("\n") if len(paths)>1 else []
554 return paths
555
556 def replay_svn_copyfrom(source_repos_url, source_url, path_offset, target_url, source_rev, \
557 copyfrom_path, copyfrom_rev, rev_map, is_dir = False, prefix = ""):
558 """
559 Given a source path and it's copy-from origin info, replay the necessary
560 "svn copy" and "svn rm" commands to correctly track renames across copy-from's.
561
562 For example, consider a sequence of events like this:
563 1. svn copy /trunk /branches/fix1
564 2. (Make some changes on /branches/fix1)
565 3. svn mv /branches/fix1/Proj1 /branches/fix1/Proj2 " Rename folder
566 4. svn mv /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt " Rename file inside renamed folder
567 5. svn co /trunk && svn merge /branches/fix1
568 After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
569 and and add of /trunk/Proj2 copy-from /branches/fix1/Proj2. If we were just
570 to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
571 we'd lose the logical history that Proj2/file2.txt is really a descendant
572 of Proj1/file1.txt.
573
574 'source_repos_url' is the full URL to the root of the source repository.
575 'source_url' is the full URL to the source path in the source repository.
576 'path_offset' is the offset from source_base to the file to check ancestry for,
577 e.g. 'projectA/file1.txt'. path = source_repos_url + source_base + path_offset.
578 'target_url' is the full URL to the target path in the target repository.
579 'source_rev' is the revision ("svn log") that we're processing from the source repo.
580 'copyfrom_path' is copy-from path, e.g. '/branches/bug123/projectA/file1.txt'
581 'copyfrom_rev' is revision this copy-from path was copied at.
582 'rev_map' is the running mapping-table dictionary for source-repo rev #'s
583 to the equivalent target-repo rev #'s.
584 'is_dir' is whether path_offset is a directory (rather than a file).
585 """
586 source_base = source_url[len(source_repos_url):]
587 srcfrom_path = copyfrom_path
588 srcfrom_rev = copyfrom_rev
589 if debug:
590 print prefix + "\x1b[32m" + ">> replay_svn_copyfrom: Check copy-from: " + source_base+" "+path_offset + " --> " + copyfrom_path+"@"+str(copyfrom_rev) + "\x1b[0m"
591 if source_base in copyfrom_path:
592 # The copy-from path is inside source_base, no need to check ancestry.
593 if debug:
594 print prefix + "\x1b[32;1m" + ">> replay_svn_copyfrom: Check copy-from: Found copy (in source_base): " + copyfrom_path+"@"+str(copyfrom_rev) + "\x1b[0m"
595 else:
596 # Check if the copy-from path has ancestors which chain back to the current source_base
597 ancestors = find_svn_ancestors(source_repos_url, source_url, path_offset, source_rev,
598 copyfrom_path, copyfrom_rev, prefix+" ")
599 if ancestors:
600 # The copy-from path has ancestory back to source_url.
601 # ancestors[n] is the original (pre-branch-copy) trunk path.
602 # ancestors[n-1] is the first commit on the new branch.
603 copyfrom_path = ancestors[len(ancestors)-1]['path']
604 copyfrom_rev = ancestors[len(ancestors)-1]['revision']
605 if debug:
606 print prefix + "\x1b[32;1m" + ">> replay_svn_copyfrom: Check copy-from: Found parent: " + copyfrom_path+"@"+str(copyfrom_rev) + "\x1b[0m"
607 if not source_base in copyfrom_path:
608 # If this copy-from path has no ancestry back to source_url, then can't do a "svn copy".
609 # Create (parent) directory if needed
610 p_path = path_offset if is_dir else os.path.dirname(path_offset).strip() or '.'
611 if not os.path.exists(p_path):
612 os.makedirs(p_path)
613 # Export the entire added tree.
614 run_svn(["export", "--force", "-r", str(copyfrom_rev),
615 source_repos_url + copyfrom_path+"@"+str(copyfrom_rev), path_offset])
616 if not in_svn(path_offset):
617 run_svn(["add", "--parents", path_offset])
618 # TODO: Need to copy SVN properties from source repos
619 else:
620 copyfrom_offset = copyfrom_path[len(source_base):].strip('/')
621 if debug:
622 print prefix + "\x1b[32m" + ">> replay_svn_copyfrom: svn_copy: Copy-from: " + copyfrom_path+"@"+str(copyfrom_rev) + "\x1b[0m"
623 # Copy this path from the equivalent path+rev in the target repo, to create the
624 # equivalent history.
625 tgt_rev = get_rev_map(rev_map, copyfrom_rev, prefix+" ")
626 if debug:
627 print prefix + "\x1b[32m" + ">> replay_svn_copyfrom: get_rev_map: " + str(copyfrom_rev) + " (source) -> " + str(tgt_rev) + " (target)" + "\x1b[0m"
628 run_svn(["copy", "-r", tgt_rev, target_url+"/"+copyfrom_offset+"@"+str(tgt_rev), path_offset])
629 # Update the content in this fresh copy to match the final target revision.
630 if is_dir:
631 paths_local = get_svn_dirlist(path_offset)
632 paths_remote = get_svn_dirlist(source_url+"/"+path_offset, source_rev)
633 if debug:
634 print prefix + "\x1b[32m" + "paths_local: " + str(paths_local) + "\x1b[0m"
635 print prefix + "\x1b[32m" + "paths_remote: " + str(paths_remote) + "\x1b[0m"
636 # Update files/folders which exist in remote but not local
637 for path in paths_remote:
638 if not path in paths_local:
639 path_is_dir = True if path[-1] == "/" else False
640 replay_svn_copyfrom(source_repos_url, source_url, path_offset+"/"+path,
641 target_url, source_rev,
642 srcfrom_path+"/"+path, srcfrom_rev,
643 rev_map, path_is_dir, prefix+" ")
644 # Remove files/folders which exist in local but not remote
645 for path in paths_local:
646 if not path in paths_remote:
647 if svnlog_verbose:
648 print " D " + source_base+"/"+path_offset+"/"+path
649 run_svn(["remove", "--force", path_offset+"/"+path])
650 # TODO: Does this handle deleted folders too? Wouldn't want to have a case
651 # where we only delete all files from folder but leave orphaned folder around.
652 else:
653 run_svn(["export", "--force", "-r", str(source_rev),
654 source_repos_url+source_base+"/"+path_offset+"@"+str(source_rev), path_offset])
655
656 def process_svn_log_entry(log_entry, source_repos_url, source_url, target_url, \
657 rev_map, removed_paths = [], commit_paths = [], prefix = ""):
658 """
659 Process SVN changes from the given log entry.
660 Returns array of all the paths in the working-copy that were changed,
661 i.e. the paths which need to be "svn commit".
662
663 'log_entry' is the array structure built by parse_svn_log_xml().
664 'source_repos_url' is the full URL to the root of the source repository.
665 'source_url' is the full URL to the source path in the source repository.
666 'target_url' is the full URL to the target path in the target repository.
667 'rev_map' is the running mapping-table dictionary for source-repo rev #'s
668 to the equivalent target-repo rev #'s.
669 'removed_paths' is the working list of deferred deletions.
670 'commit_paths' is the working list of specific paths which changes to pass
671 to the final "svn commit".
672 """
673 # Get the relative offset of source_url based on source_repos_url
674 # e.g. '/branches/bug123'
675 source_base = source_url[len(source_repos_url):]
676 source_rev = log_entry['revision']
677 if debug:
678 print prefix + "\x1b[32m" + ">> process_svn_log_entry: " + source_url+"@"+str(source_rev) + "\x1b[0m"
679 for d in log_entry['changed_paths']:
680 # Get the full path for this changed_path
681 # e.g. '/branches/bug123/projectA/file1.txt'
682 path = d['path']
683 if not path.startswith(source_base + "/"):
684 # Ignore changed files that are not part of this subdir
685 if path != source_base:
686 if debug:
687 print prefix + "\x1b[90m" + ">> process_svn_log_entry: Unrelated path: " + path + " (" + source_base + ")" + "\x1b[0m"
688 continue
689 # Calculate the offset (based on source_base) for this changed_path
690 # e.g. 'projectA/file1.txt'
691 # (path = source_base + "/" + path_offset)
692 path_offset = path[len(source_base):].strip("/")
693 # Get the action for this path
694 action = d['action']
695 if action not in 'MARD':
696 display_error("In SVN rev. %d: action '%s' not supported. \
697 Please report a bug!" % (source_rev, action))
698
699 # Try to be efficient and keep track of an explicit list of paths in the
700 # working copy that changed. If we commit from the root of the working copy,
701 # then SVN needs to crawl the entire working copy looking for pending changes.
702 # But, if we gather too many paths to commit, then we wipe commit_paths below
703 # and end-up doing a commit at the root of the working-copy.
704 if len (commit_paths) < 100:
705 commit_paths.append(path_offset)
706
707 # Special-handling for replace's
708 if action == 'R':
709 if svnlog_verbose:
710 msg = " " + action + " " + d['path']
711 if d['copyfrom_path']:
712 msg += " (from " + d['copyfrom_path']+"@"+str(d['copyfrom_revision']) + ")"
713 print prefix + msg
714 # If file was "replaced" (deleted then re-added, all in same revision),
715 # then we need to run the "svn rm" first, then change action='A'. This
716 # lets the normal code below handle re-"svn add"'ing the files. This
717 # should replicate the "replace".
718 run_svn(["remove", "--force", path_offset])
719 action = 'A'
720
721 # Handle all the various action-types
722 # (Handle "add" first, for "svn copy/move" support)
723 if action == 'A':
724 if svnlog_verbose:
725 msg = " " + action + " " + d['path']
726 if d['copyfrom_path']:
727 msg += " (from " + d['copyfrom_path']+"@"+str(d['copyfrom_revision']) + ")"
728 print prefix + msg
729 # If we have any queued deletions for this same path, remove those if we're re-adding this path.
730 if (path_offset) in removed_paths:
731 removed_paths.remove(path_offset)
732 # Determine where to export from.
733 copyfrom_path = path
734 copyfrom_rev = source_rev
735 svn_copy = False
736 path_is_dir = True if d['kind'] == 'dir' else False
737 # Handle cases where this "add" was a copy from another URL in the source repos
738 if d['copyfrom_revision']:
739 copyfrom_path = d['copyfrom_path']
740 copyfrom_rev = d['copyfrom_revision']
741 replay_svn_copyfrom(source_repos_url, source_url, path_offset, target_url, source_rev,
742 copyfrom_path, copyfrom_rev, rev_map, path_is_dir, prefix+" ")
743 # Else just "svn export" the files from the source repo and "svn add" them.
744 else:
745 # Create (parent) directory if needed
746 p_path = path_offset if path_is_dir else os.path.dirname(path_offset).strip() or '.'
747 if not os.path.exists(p_path):
748 os.makedirs(p_path)
749 # Export the entire added tree.
750 run_svn(["export", "--force", "-r", str(copyfrom_rev),
751 source_repos_url + copyfrom_path+"@"+str(copyfrom_rev), path_offset])
752 if not in_svn(path_offset):
753 run_svn(["add", "--parents", path_offset])
754 # TODO: Need to copy SVN properties from source repos
755
756 elif action == 'D':
757 # Queue "svn remove" commands, to allow the action == 'A' handling the opportunity
758 # to do smart "svn copy" handling on copy/move/renames.
759 if not (path_offset) in removed_paths:
760 removed_paths.append(path_offset)
761
762 elif action == 'M':
763 if svnlog_verbose:
764 print prefix + " " + action + " " + d['path']
765 # TODO: Is "svn merge -c" correct here? Should this just be an "svn export" plus
766 # proplist updating?
767 out = run_svn(["merge", "-c", str(source_rev), "--non-recursive",
768 "--non-interactive", "--accept=theirs-full",
769 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
770
771 else:
772 display_error("Internal Error: process_svn_log_entry: Unhandled 'action' value: '" + action + "'")
773
774 return commit_paths
775
776 def disp_svn_log_summary(log_entry):
777 print "\n(Starting source rev #"+str(log_entry['revision'])+":)"
778 print "r"+str(log_entry['revision']) + " | " + \
779 log_entry['author'] + " | " + \
780 str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' '))
781 print log_entry['message']
782 print "------------------------------------------------------------------------"
783
784 def pull_svn_rev(log_entry, source_repos_url, source_repos_uuid, source_url, target_url, rev_map, keep_author=False):
785 """
786 Pull SVN changes from the given log entry.
787 Returns the new SVN revision.
788 If an exception occurs, it will rollback to revision 'source_rev - 1'.
789 """
790 disp_svn_log_summary(log_entry)
791 source_rev = log_entry['revision']
792
793 # Process all the paths in this log entry
794 removed_paths = []
795 commit_paths = []
796 process_svn_log_entry(log_entry, source_repos_url, source_url, target_url,
797 rev_map, removed_paths, commit_paths)
798 # Process any deferred removed actions
799 if removed_paths:
800 path_base = source_url[len(source_repos_url):]
801 for path_offset in removed_paths:
802 if svnlog_verbose:
803 print " D " + path_base+"/"+path_offset
804 run_svn(["remove", "--force", path_offset])
805
806 # If we had too many individual paths to commit, wipe the list and just commit at
807 # the root of the working copy.
808 if len (commit_paths) > 99:
809 commit_paths = []
810
811 # Add source-tracking revprop's
812 revprops = [{'name':'source_uuid', 'value':source_repos_uuid},
813 {'name':'source_url', 'value':source_url},
814 {'name':'source_rev', 'value':source_rev}]
815 commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author, revprops=revprops)
816 print "(Finished source rev #"+str(source_rev)+")"
817
818 def main():
819 usage = "Usage: %prog [-a] [-c] [-r SVN rev] source_url target_url"
820 parser = OptionParser(usage)
821 parser.add_option("-r", "--revision", type="int", dest="svn_rev", metavar="REV",
822 help="initial SVN revision to checkout from")
823 parser.add_option("-a", "--keep-author", action="store_true", dest="keep_author",
824 help="maintain original Author info from source repo")
825 parser.add_option("-c", "--continue", action="store_true", dest="cont_from_break",
826 help="continue from previous break")
827 parser.add_option("-v", "--verbose", action="store_true", dest="verbose",
828 help="show 'svn status'-style messages for each action replayed [default]")
829 parser.add_option("-q", "--quiet", action="store_false", dest="verbose",
830 help="show only minimal status/progress messages")
831 parser.set_defaults(verbose=True)
832 group = OptionGroup(parser, "Debug Options")
833 group.add_option("--debug", action="store_true", dest="debug_all",
834 help="enable all debugging options")
835 group.add_option("--debug-showcmds", action="store_true", dest="debug_showcmds",
836 help="display each SVN command being executed")
837 group.add_option("--debug-debugmsgs", action="store_true", dest="debug_debugmsgs",
838 help="display debug messages")
839 parser.add_option_group(group)
840 (options, args) = parser.parse_args()
841 if len(args) != 2:
842 display_error("incorrect number of arguments\n\nTry: svn2svn.py --help",
843 False)
844
845 source_url = args.pop(0).rstrip("/")
846 target_url = args.pop(0).rstrip("/")
847 if options.keep_author:
848 keep_author = True
849 else:
850 keep_author = False
851
852 # Find the greatest_rev in the source repo
853 svn_info = get_svn_info(source_url)
854 greatest_rev = svn_info['revision']
855 # Get the base URL for the source repos, e.g. 'svn://svn.example.com/svn/repo'
856 source_repos_url = svn_info['repos_url']
857 # Get the UUID for the source repos
858 source_repos_uuid = svn_info['repos_uuid']
859
860 dup_wc = "_dup_wc"
861 rev_map = {}
862 global debug, runsvn_showcmd, svnlog_verbose
863
864 if options.debug_debugmsgs:
865 debug = True
866 if options.debug_showcmds:
867 runsvn_showcmd = True
868 if options.debug_all:
869 debug = True
870 runsvn_showcmd = True
871 if options.verbose:
872 svnlog_verbose = True
873
874 # if old working copy does not exist, disable continue mode
875 # TODO: Better continue support. Maybe include source repo's rev # in target commit info?
876 if not os.path.exists(dup_wc):
877 options.cont_from_break = False
878
879 if not options.cont_from_break:
880 # Warn if Target SVN URL existed
881 cmd = find_program("svn")
882 pipe = Popen([cmd] + ["list"] + [target_url], executable=cmd,
883 stdout=PIPE, stderr=PIPE)
884 out, err = pipe.communicate()
885 if pipe.returncode == 0:
886 print "Target SVN URL: %s existed!" % target_url
887 if out:
888 print out
889 print "Press 'Enter' to Continue, 'Ctrl + C' to Cancel..."
890 print "(Timeout in 5 seconds)"
891 rfds, wfds, efds = select.select([sys.stdin], [], [], 5)
892
893 # Get log entry for the SVN revision we will check out
894 if options.svn_rev:
895 # If specify a rev, get log entry just before or at rev
896 svn_start_log = get_last_svn_log_entry(source_url, 1, options.svn_rev, False)
897 else:
898 # Otherwise, get log entry of branch creation
899 # TODO: This call is *very* expensive on a repo with lots of revisions.
900 # Even though the call is passing --limit 1, it seems like that limit-filter
901 # is happening after SVN has fetched the full log history.
902 svn_start_log = get_first_svn_log_entry(source_url, 1, greatest_rev, False)
903
904 # This is the revision we will start from for source_url
905 source_rev = svn_start_log['revision']
906
907 # Check out a working copy of target_url
908 dup_wc = os.path.abspath(dup_wc)
909 if os.path.exists(dup_wc):
910 shutil.rmtree(dup_wc)
911 svn_checkout(target_url, dup_wc)
912 os.chdir(dup_wc)
913
914 # For the initial commit to the target URL, export all the contents from
915 # the source URL at the start-revision.
916 paths = run_svn(["list", "-r", str(source_rev), source_url+"@"+str(source_rev)])
917 if len(paths)>1:
918 disp_svn_log_summary(get_one_svn_log_entry(source_url, source_rev, source_rev))
919 print "(Initial import)"
920 paths = paths.strip("\n").split("\n")
921 for path in paths:
922 # For each top-level file/folder...
923 if not path:
924 # Skip null lines
925 break
926 # Directories have a trailing slash in the "svn list" output
927 path_is_dir = True if path[-1] == "/" else False
928 if path_is_dir:
929 path=path.rstrip('/')
930 if not os.path.exists(path):
931 os.makedirs(path)
932 run_svn(["export", "--force", "-r" , str(source_rev), source_url+"/"+path+"@"+str(source_rev), path])
933 run_svn(["add", path])
934 revprops = [{'name':'source_uuid', 'value':source_repos_uuid},
935 {'name':'source_url', 'value':source_url},
936 {'name':'source_rev', 'value':source_rev}]
937 commit_from_svn_log_entry(svn_start_log, [], keep_author=keep_author, revprops=revprops)
938 print "(Finished source rev #"+str(source_rev)+")"
939 else:
940 dup_wc = os.path.abspath(dup_wc)
941 os.chdir(dup_wc)
942 # TODO: Need better resume support. For the time being, expect caller explictly passes in resume revision.
943 source_rev = options.svn_rev
944 if source_rev < 1:
945 display_error("Invalid arguments\n\nNeed to pass result rev # (-r) when using continue-mode (-c)", False)
946
947 # Load SVN log starting from source_rev + 1
948 it_log_entries = iter_svn_log_entries(source_url, source_rev + 1, greatest_rev)
949
950 try:
951 for log_entry in it_log_entries:
952 # Replay this revision from source_url into target_url
953 pull_svn_rev(log_entry, source_repos_url, source_repos_uuid, source_url,
954 target_url, rev_map, keep_author)
955 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
956 run_svn(["up", dup_wc])
957 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
958 dup_info = get_svn_info(target_url)
959 dup_rev = dup_info['revision']
960 source_rev = log_entry['revision']
961 rev_map[source_rev] = dup_rev
962
963 except KeyboardInterrupt:
964 print "\nStopped by user."
965 run_svn(["cleanup"])
966 run_svn(["revert", "--recursive", "."])
967 except:
968 print "\nCommand failed with following error:\n"
969 traceback.print_exc()
970 run_svn(["cleanup"])
971 run_svn(["revert", "--recursive", "."])
972 finally:
973 run_svn(["up"])
974 print "\nFinished!"
975
976
977 if __name__ == "__main__":
978 main()
979
980 # vim:sts=4:sw=4: