]> Tony Duckles's Git Repositories (git.nynim.org) - svn2svn.git/blob - svn2svn.py
More robust in_svn()
[svn2svn.git] / svn2svn.py
1 #!/usr/bin/env python
2 """
3 svn2svn.py
4
5 Replicate (replay) changesets from one SVN repository to another:
6 * Maintains full logical history (e.g. uses "svn copy" for renames).
7 * Maintains original commit messages.
8 * Optionally maintain source author info. (Only supported if accessing
9 target SVN repo via file://)
10 * Cannot maintain original commit date, but appends original commit date
11 for each commit message: "Date: %d".
12 * Optionally run an external shell script before each replayed commit
13 to give the ability to dynamically exclude or modify files as part
14 of the replay.
15
16 License: GPLv2, the same as hgsvn.
17 Author: Tony Duckles (https://github.com/tonyduckles/svn2svn)
18 (This is a forked and heavily modified verison of http://code.google.com/p/svn2svn/)
19 """
20
21 import os
22 import sys
23 import time
24 import locale
25 import shutil
26 import select
27 import calendar
28 import traceback
29
30 from optparse import OptionParser,OptionGroup
31 from subprocess import Popen, PIPE
32 from datetime import datetime
33 from operator import itemgetter
34
35 try:
36 from xml.etree import cElementTree as ET
37 except ImportError:
38 try:
39 from xml.etree import ElementTree as ET
40 except ImportError:
41 try:
42 import cElementTree as ET
43 except ImportError:
44 from elementtree import ElementTree as ET
45
46 svn_log_args = ['log', '--xml']
47 svn_info_args = ['info', '--xml']
48 svn_checkout_args = ['checkout', '-q']
49 svn_status_args = ['status', '--xml', '-v', '--ignore-externals']
50
51 # Setup debug options
52 debug = False
53 runsvn_timing = False # Display how long each "svn" OS command took to run?
54 # Setup verbosity options
55 runsvn_showcmd = False # Display every "svn" OS command we run?
56 runsvn_showout = False # Display the stdout results from every "svn" OS command we run?
57 svnlog_verbose = False # Display each action + changed-path as we walk the history?
58
59 # define exception class
60 class ExternalCommandFailed(RuntimeError):
61 """
62 An external command failed.
63 """
64
65 def display_error(message, raise_exception = True):
66 """
67 Display error message, then terminate.
68 """
69 print "Error:", message
70 print
71 if raise_exception:
72 raise ExternalCommandFailed
73 else:
74 sys.exit(1)
75
76 # Windows compatibility code by Bill Baxter
77 if os.name == "nt":
78 def find_program(name):
79 """
80 Find the name of the program for Popen.
81 Windows is finnicky about having the complete file name. Popen
82 won't search the %PATH% for you automatically.
83 (Adapted from ctypes.find_library)
84 """
85 # See MSDN for the REAL search order.
86 base, ext = os.path.splitext(name)
87 if ext:
88 exts = [ext]
89 else:
90 exts = ['.bat', '.exe']
91 for directory in os.environ['PATH'].split(os.pathsep):
92 for e in exts:
93 fname = os.path.join(directory, base + e)
94 if os.path.exists(fname):
95 return fname
96 return None
97 else:
98 def find_program(name):
99 """
100 Find the name of the program for Popen.
101 On Unix, popen isn't picky about having absolute paths.
102 """
103 return name
104
105 def shell_quote(s):
106 if runsvn_showcmd:
107 import re
108 p = re.compile('^[A-Za-z0-9=-]+$')
109 if p.match(s):
110 return s
111 if os.name == "nt":
112 q = '"'
113 else:
114 q = "'"
115 return q + s.replace('\\', '\\\\').replace("'", "'\"'\"'") + q
116
117 locale_encoding = locale.getpreferredencoding()
118
119 def run_svn(args, fail_if_stderr=False, ignore_retcode_err=False, encoding="utf-8"):
120 """
121 Run svn cmd in PIPE
122 exit if svn cmd failed
123 """
124 def _transform_arg(a):
125 if isinstance(a, unicode):
126 a = a.encode(encoding or locale_encoding)
127 elif not isinstance(a, str):
128 a = str(a)
129 return a
130 t_args = map(_transform_arg, args)
131
132 cmd = find_program("svn")
133 cmd_string = str(" ".join(map(shell_quote, [cmd] + t_args)))
134 if runsvn_showcmd:
135 # Default to bright-blue for svn commands that will take action on the working-copy.
136 color = "94"
137 # For status-only commands (or commands that aren't important to highlight), show in dim-blue.
138 status_cmds = ['status', 'st', 'log', 'info', 'list', 'propset', 'update', 'up', 'cleanup', 'revert']
139 if args[0] in status_cmds:
140 color = "34"
141 print "\x1b[34m"+"$"+"\x1b["+color+"m", cmd_string + "\x1b[0m"
142 if runsvn_timing:
143 time1 = time.time()
144 pipe = Popen([cmd] + t_args, executable=cmd, stdout=PIPE, stderr=PIPE)
145 out, err = pipe.communicate()
146 if runsvn_timing:
147 time2 = time.time()
148 print "(" + str(round(time2-time1,4)) + " elapsed)"
149 if out and runsvn_showout:
150 print out
151 if (pipe.returncode != 0 and not ignore_retcode_err) or (fail_if_stderr and err.strip()):
152 display_error("External program failed (return code %d): %s\n%s"
153 % (pipe.returncode, cmd_string, err))
154 return out
155
156 def svn_date_to_timestamp(svn_date):
157 """
158 Parse an SVN date as read from the XML output and
159 return the corresponding timestamp.
160 """
161 # Strip microseconds and timezone (always UTC, hopefully)
162 # XXX there are various ISO datetime parsing routines out there,
163 # cf. http://seehuhn.de/comp/pdate
164 date = svn_date.split('.', 2)[0]
165 time_tuple = time.strptime(date, "%Y-%m-%dT%H:%M:%S")
166 return calendar.timegm(time_tuple)
167
168 def parse_svn_info_xml(xml_string):
169 """
170 Parse the XML output from an "svn info" command and extract
171 useful information as a dict.
172 """
173 d = {}
174 tree = ET.fromstring(xml_string)
175 entry = tree.find('.//entry')
176 if entry:
177 d['url'] = entry.find('url').text
178 d['revision'] = int(entry.get('revision'))
179 d['repos_url'] = tree.find('.//repository/root').text
180 d['repos_uuid'] = tree.find('.//repository/uuid').text
181 d['last_changed_rev'] = int(tree.find('.//commit').get('revision'))
182 d['kind'] = entry.get('kind')
183 return d
184
185 def parse_svn_log_xml(xml_string):
186 """
187 Parse the XML output from an "svn log" command and extract
188 useful information as a list of dicts (one per log changeset).
189 """
190 l = []
191 tree = ET.fromstring(xml_string)
192 for entry in tree.findall('logentry'):
193 d = {}
194 d['revision'] = int(entry.get('revision'))
195 # Some revisions don't have authors, most notably
196 # the first revision in a repository.
197 author = entry.find('author')
198 d['author'] = author is not None and author.text or None
199 d['date'] = svn_date_to_timestamp(entry.find('date').text)
200 # Some revisions may have empty commit message
201 message = entry.find('msg')
202 message = message is not None and message.text is not None \
203 and message.text.strip() or ""
204 # Replace DOS return '\r\n' and MacOS return '\r' with unix return '\n'
205 d['message'] = message.replace('\r\n', '\n').replace('\n\r', '\n'). \
206 replace('\r', '\n')
207 revprops = []
208 for prop in entry.findall('.//revprops/property'):
209 revprops.append({ 'name': prop.get('name'), 'value': prop.text })
210 d['revprops'] = revprops
211 paths = []
212 for path in entry.findall('.//paths/path'):
213 copyfrom_rev = path.get('copyfrom-rev')
214 if copyfrom_rev:
215 copyfrom_rev = int(copyfrom_rev)
216 paths.append({
217 'path': path.text,
218 'kind': path.get('kind'),
219 'action': path.get('action'),
220 'copyfrom_path': path.get('copyfrom-path'),
221 'copyfrom_revision': copyfrom_rev,
222 })
223 # Need to sort paths (i.e. into hierarchical order), so that process_svn_log_entry()
224 # can process actions in depth-first order.
225 d['changed_paths'] = sorted(paths, key=itemgetter('path'))
226 l.append(d)
227 return l
228
229 def parse_svn_status_xml(xml_string, base_dir=None):
230 """
231 Parse the XML output from an "svn status" command and extract
232 useful info as a list of dicts (one per status entry).
233 """
234 l = []
235 tree = ET.fromstring(xml_string)
236 for entry in tree.findall('.//entry'):
237 d = {}
238 path = entry.get('path')
239 if base_dir is not None:
240 assert path.startswith(base_dir)
241 path = path[len(base_dir):].lstrip('/\\')
242 d['path'] = path
243 wc_status = entry.find('wc-status')
244 d['wc_status'] = {
245 'props': wc_status.get('props'),
246 'item': wc_status.get('item'),
247 'copied': wc_status.get('copied'),
248 'revision': wc_status.get('revision'),
249 }
250 if d['wc_status']['item'] == 'external':
251 d['type'] = 'external'
252 elif d['wc_status']['item'] == 'deleted':
253 d['type'] = 'deleted'
254 elif d['wc_status']['item'] == 'added':
255 d['type'] = 'added'
256 elif (wc_status.get('revision') is not None) or (d['wc_status']['item'] == 'normal'):
257 d['type'] = 'normal'
258 else:
259 d['type'] = 'unversioned'
260 l.append(d)
261 return l
262
263 def get_svn_info(svn_url_or_wc, rev_number=None):
264 """
265 Get SVN information for the given URL or working copy,
266 with an optionally specified revision number.
267 Returns a dict as created by parse_svn_info_xml().
268 """
269 if rev_number is not None:
270 args = [svn_url_or_wc + "@" + str(rev_number)]
271 else:
272 args = [svn_url_or_wc]
273 xml_string = run_svn(svn_info_args + args, fail_if_stderr=True)
274 return parse_svn_info_xml(xml_string)
275
276 def svn_checkout(svn_url, checkout_dir, rev_number=None):
277 """
278 Checkout the given URL at an optional revision number.
279 """
280 args = []
281 if rev_number is not None:
282 args += ['-r', rev_number]
283 args += [svn_url, checkout_dir]
284 return run_svn(svn_checkout_args + args)
285
286 def run_svn_log(svn_url_or_wc, rev_start, rev_end, limit, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
287 """
288 Fetch up to 'limit' SVN log entries between the given revisions.
289 """
290 args = []
291 if stop_on_copy:
292 args += ['--stop-on-copy']
293 if get_changed_paths:
294 args += ['-v']
295 if get_revprops:
296 args += ['--with-all-revprops']
297 url = str(svn_url_or_wc)
298 if rev_start != 'HEAD' and rev_end != 'HEAD':
299 args += ['-r', '%s:%s' % (rev_start, rev_end)]
300 if not "@" in svn_url_or_wc:
301 url += "@" + str(max(rev_start, rev_end))
302 args += ['--limit', str(limit), url]
303 xml_string = run_svn(svn_log_args + args)
304 return parse_svn_log_xml(xml_string)
305
306 def get_svn_status(svn_wc, flags=None):
307 """
308 Get SVN status information about the given working copy.
309 """
310 # Ensure proper stripping by canonicalizing the path
311 svn_wc = os.path.abspath(svn_wc)
312 args = []
313 if flags:
314 args += [flags]
315 args += [svn_wc]
316 xml_string = run_svn(svn_status_args + args)
317 return parse_svn_status_xml(xml_string, svn_wc)
318
319 def get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
320 """
321 Get the first SVN log entry in the requested revision range.
322 """
323 entries = run_svn_log(svn_url, rev_start, rev_end, 1, stop_on_copy, get_changed_paths, get_revprops)
324 if not entries:
325 display_error("No SVN log for %s between revisions %s and %s" %
326 (svn_url, rev_start, rev_end))
327
328 return entries[0]
329
330 def get_first_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
331 """
332 Get the first log entry after/at the given revision number in an SVN branch.
333 By default the revision number is set to 0, which will give you the log
334 entry corresponding to the branch creaction.
335
336 NOTE: to know whether the branch creation corresponds to an SVN import or
337 a copy from another branch, inspect elements of the 'changed_paths' entry
338 in the returned dictionary.
339 """
340 return get_one_svn_log_entry(svn_url, rev_start, rev_end, stop_on_copy=True, get_changed_paths=True)
341
342 def get_last_svn_log_entry(svn_url, rev_start, rev_end, get_changed_paths=True):
343 """
344 Get the last log entry before/at the given revision number in an SVN branch.
345 By default the revision number is set to HEAD, which will give you the log
346 entry corresponding to the latest commit in branch.
347 """
348 return get_one_svn_log_entry(svn_url, rev_end, rev_start, stop_on_copy=True, get_changed_paths=True)
349
350
351 log_duration_threshold = 10.0
352 log_min_chunk_length = 10
353
354 def iter_svn_log_entries(svn_url, first_rev, last_rev, stop_on_copy=False, get_changed_paths=True, get_revprops=False):
355 """
356 Iterate over SVN log entries between first_rev and last_rev.
357
358 This function features chunked log fetching so that it isn't too nasty
359 to the SVN server if many entries are requested.
360 """
361 cur_rev = first_rev
362 chunk_length = log_min_chunk_length
363 chunk_interval_factor = 1.0
364 while last_rev == "HEAD" or cur_rev <= last_rev:
365 start_t = time.time()
366 stop_rev = min(last_rev, cur_rev + int(chunk_length * chunk_interval_factor))
367 entries = run_svn_log(svn_url, cur_rev, stop_rev, chunk_length, stop_on_copy , get_changed_paths, get_revprops)
368 duration = time.time() - start_t
369 if not entries:
370 if stop_rev == last_rev:
371 break
372 cur_rev = stop_rev + 1
373 chunk_interval_factor *= 2.0
374 continue
375 for e in entries:
376 yield e
377 cur_rev = e['revision'] + 1
378 # Adapt chunk length based on measured request duration
379 if duration < log_duration_threshold:
380 chunk_length = int(chunk_length * 2.0)
381 elif duration > log_duration_threshold * 2:
382 chunk_length = max(log_min_chunk_length, int(chunk_length / 2.0))
383
384 def commit_from_svn_log_entry(entry, files=None, keep_author=False, revprops=[]):
385 """
386 Given an SVN log entry and an optional sequence of files, do an svn commit.
387 """
388 # TODO: Run optional external shell hook here, for doing pre-commit filtering
389 # This will use the local timezone for displaying commit times
390 timestamp = int(entry['date'])
391 svn_date = str(datetime.fromtimestamp(timestamp))
392 # Uncomment this one one if you prefer UTC commit times
393 #svn_date = "%d 0" % timestamp
394 if keep_author:
395 options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date, "--username", entry['author']]
396 else:
397 options = ["ci", "--force-log", "-m", entry['message'] + "\nDate: " + svn_date + "\nAuthor: " + entry['author']]
398 if revprops:
399 for r in revprops:
400 options += ["--with-revprop", r['name']+"="+str(r['value'])]
401 if files:
402 options += list(files)
403 print "(Committing source rev #"+str(entry['revision'])+"...)"
404 run_svn(options)
405
406 def in_svn(p, in_repo=False):
407 """
408 Check if a given file/folder is being tracked by Subversion.
409 Prior to SVN 1.6, we could "cheat" and look for the existence of ".svn" directories.
410 With SVN 1.7 and beyond, WC-NG means only a single top-level ".svn" at the root of the working-copy.
411 Use "svn status" to check the status of the file/folder.
412 """
413 entries = get_svn_status(p)
414 if not entries:
415 return False
416 d = entries[0]
417 # If caller requires this path to be in the SVN repo, prevent returning True for locally-added paths.
418 if in_repo and (d['type'] == 'added' or d['wc_status']['revision'] is None):
419 return False
420 return True if (d['type'] == 'normal' or d['type'] == 'added') else False
421
422 def find_svn_ancestors(source_repos_url, source_url, path_offset, source_rev, \
423 copyfrom_path, copyfrom_rev, prefix = ""):
424 """
425 Given a final svn-add'd path (source_base+"/"+path_offset) and the origin copy-from
426 path (copyfrom_path), walk the SVN history backwards to inspect the ancestory of
427 that path. Build a collection of copyfrom_path+revision pairs for each of the
428 branch-copies since the initial branch-creation. If we find a copyfrom_path which
429 source_url is a substring match of (e.g. we crawled back to the initial branch-
430 copy from trunk), then return the collection of ancestor paths. Otherwise,
431 copyfrom_path has no ancestory compared to source_url.
432
433 This is useful when comparing "trunk" vs. "branch" paths, to handle cases where a
434 file/folder was renamed in a branch and then that branch was merged back to trunk.
435
436 'source_repos_url' is the full URL to the root of the source repository,
437 e.g. 'file:///path/to/repo'
438 'source_url' is the full URL to the source path in the source repository.
439 'path_offset' is the offset from source_base to the file to check ancestry for,
440 e.g. 'projectA/file1.txt'. path = source_repos_url + source_base + path_offset.
441 'source_rev' is the revision ("svn log") that we're processing from the source repo.
442 'copyfrom_path' is copy-from path, e.g. '/branches/bug123/projectA/file1.txt'
443 'copyfrom_rev' is revision this copy-from path was copied at.
444 """
445 done = False
446 source_base = source_url[len(source_repos_url):]
447 working_path = copyfrom_path
448 working_rev = copyfrom_rev
449 ancestors_temp = [{'path': source_base+"/"+path_offset, 'revision': source_rev, 'copyfrom_path': copyfrom_path, 'copyfrom_rev': copyfrom_rev}]
450 while not done:
451 # Get the first "svn log" entry for this path (relative to @rev)
452 #working_path = working_base + "/" + working_offset
453 if debug:
454 print prefix+"\x1b[33m" + ">> find_svn_ancestors: " + source_repos_url + working_path+"@"+str(working_rev) + "\x1b[0m"
455 log_entry = get_first_svn_log_entry(source_repos_url + working_path+"@"+str(working_rev), 1, str(working_rev), True)
456 if not log_entry:
457 done = True
458 break
459 # Search for any actions on our target path (or parent paths).
460 changed_paths_temp = []
461 for d in log_entry['changed_paths']:
462 path = d['path']
463 if path in working_path:
464 changed_paths_temp.append({'path': path, 'data': d})
465 if not changed_paths_temp:
466 # If no matches, then we've hit the end of the chain and this path has no ancestry back to source_url.
467 done = True
468 continue
469 # Reverse-sort any matches, so that we start with the most-granular (deepest in the tree) path.
470 changed_paths = sorted(changed_paths_temp, key=itemgetter('path'), reverse=True)
471 # Find the action for our working_path in this revision
472 for v in changed_paths:
473 d = v['data']
474 path = d['path']
475 # Check action-type for this file
476 action = d['action']
477 if action not in 'MARD':
478 display_error("In SVN rev. %d: action '%s' not supported. \
479 Please report a bug!" % (log_entry['revision'], action))
480 if debug:
481 debug_desc = "> " + action + " " + path
482 if d['copyfrom_path']:
483 debug_desc += " (from " + d['copyfrom_path']+"@"+str(d['copyfrom_revision']) + ")"
484 print prefix+"\x1b[33m" + debug_desc + "\x1b[0m"
485
486 if action == 'R':
487 # If file/folder was replaced, it has no ancestor
488 ancestors_temp = []
489 done = True
490 break
491 if action == 'D':
492 # If file/folder was deleted, it has no ancestor
493 ancestors_temp = []
494 done = True
495 break
496 if action == 'A':
497 # If file/folder was added but not a copy, it has no ancestor
498 if not d['copyfrom_path']:
499 ancestors_temp = []
500 done = True
501 break
502 # Else, file/folder was added and is a copy, so add an entry to our ancestors list
503 # and keep checking for ancestors
504 if debug:
505 print prefix+"\x1b[33m" + ">> find_svn_ancestors: Found copy-from: " + \
506 path + " --> " + d['copyfrom_path']+"@"+str(d['copyfrom_revision']) + "\x1b[0m"
507 ancestors_temp.append({'path': path, 'revision': log_entry['revision'],
508 'copyfrom_path': d['copyfrom_path'], 'copyfrom_rev': d['copyfrom_revision']})
509 working_path = working_path.replace(d['path'], d['copyfrom_path'])
510 working_rev = d['copyfrom_revision']
511 # If we found a copy-from case which matches our source_base, we're done
512 if source_base in working_path:
513 done = True
514 break
515 # Else, follow the copy and keep on searching
516 break
517 ancestors = []
518 if ancestors_temp:
519 working_path = source_base+"/"+path_offset
520 for idx in range(0, len(ancestors_temp)):
521 d = ancestors_temp[idx]
522 working_path = working_path.replace(d['path'], d['copyfrom_path'])
523 working_rev = d['copyfrom_rev']
524 ancestors.append({'path': working_path, 'revision': working_rev})
525 if debug:
526 max_len = 0
527 for idx in range(len(ancestors)):
528 d = ancestors[idx]
529 max_len = max(max_len, len(d['path']+"@"+str(d['revision'])))
530 print prefix+"\x1b[93m" + ">> find_svn_ancestors: Found parent ancestors: " + "\x1b[0m"
531 for idx in range(len(ancestors)-1):
532 d = ancestors[idx]
533 d_next = ancestors[idx+1]
534 print prefix+"\x1b[33m" + " ["+str(idx)+"] " + str(d['path']+"@"+str(d['revision'])).ljust(max_len) + \
535 " <-- " + str(d_next['path']+"@"+str(d_next['revision'])).ljust(max_len) + "\x1b[0m"
536 return ancestors
537
538 def get_rev_map(rev_map, src_rev, prefix):
539 """
540 Find the equivalent rev # in the target repo for the given rev # from the source repo.
541 """
542 print prefix + "\x1b[32m" + ">> get_rev_map("+str(src_rev)+")" + "\x1b[0m"
543 # Find the highest entry less-than-or-equal-to src_rev
544 for rev in range(src_rev, 0, -1):
545 if debug:
546 print prefix + "\x1b[32m" + ">> get_rev_map: rev="+str(rev)+" in_rev_map="+str(rev in rev_map) + "\x1b[0m"
547 if rev in rev_map:
548 return rev_map[rev]
549 # Else, we fell off the bottom of the rev_map. Ruh-roh...
550 display_error("Internal Error: get_rev_map: Unable to find match rev_map entry for src_rev=" + src_rev)
551
552 def get_svn_dirlist(svn_path, svn_rev = ""):
553 """
554 Get a list of all the child contents (recusive) of the given folder path.
555 """
556 args = ["list", "--recursive"]
557 path = svn_path
558 if svn_rev:
559 args += ["-r", str(svn_rev)]
560 path += "@"+str(svn_rev)
561 args += [path]
562 paths = run_svn(args, False, True)
563 paths = paths.strip("\n").split("\n") if len(paths)>1 else []
564 return paths
565
566 def replay_svn_copyfrom(source_repos_url, source_url, path_offset, target_url, source_rev, \
567 copyfrom_path, copyfrom_rev, rev_map, is_dir = False, prefix = ""):
568 """
569 Given a source path and it's copy-from origin info, replay the necessary
570 "svn copy" and "svn rm" commands to correctly track renames across copy-from's.
571
572 For example, consider a sequence of events like this:
573 1. svn copy /trunk /branches/fix1
574 2. (Make some changes on /branches/fix1)
575 3. svn mv /branches/fix1/Proj1 /branches/fix1/Proj2 " Rename folder
576 4. svn mv /branches/fix1/Proj2/file1.txt /branches/fix1/Proj2/file2.txt " Rename file inside renamed folder
577 5. svn co /trunk && svn merge /branches/fix1
578 After the merge and commit, "svn log -v" with show a delete of /trunk/Proj1
579 and and add of /trunk/Proj2 copy-from /branches/fix1/Proj2. If we were just
580 to do a straight "svn export+add" based on the /branches/fix1/Proj2 folder,
581 we'd lose the logical history that Proj2/file2.txt is really a descendant
582 of Proj1/file1.txt.
583
584 'source_repos_url' is the full URL to the root of the source repository.
585 'source_url' is the full URL to the source path in the source repository.
586 'path_offset' is the offset from source_base to the file to check ancestry for,
587 e.g. 'projectA/file1.txt'. path = source_repos_url + source_base + path_offset.
588 'target_url' is the full URL to the target path in the target repository.
589 'source_rev' is the revision ("svn log") that we're processing from the source repo.
590 'copyfrom_path' is copy-from path, e.g. '/branches/bug123/projectA/file1.txt'
591 'copyfrom_rev' is revision this copy-from path was copied at.
592 'rev_map' is the running mapping-table dictionary for source-repo rev #'s
593 to the equivalent target-repo rev #'s.
594 'is_dir' is whether path_offset is a directory (rather than a file).
595 """
596 source_base = source_url[len(source_repos_url):]
597 srcfrom_path = copyfrom_path
598 srcfrom_rev = copyfrom_rev
599 if debug:
600 print prefix + "\x1b[32m" + ">> replay_svn_copyfrom: Check copy-from: " + source_base+" "+path_offset + " --> " + copyfrom_path+"@"+str(copyfrom_rev) + "\x1b[0m"
601 if source_base in copyfrom_path:
602 # The copy-from path is inside source_base, no need to check ancestry.
603 if debug:
604 print prefix + "\x1b[32;1m" + ">> replay_svn_copyfrom: Check copy-from: Found copy (in source_base): " + copyfrom_path+"@"+str(copyfrom_rev) + "\x1b[0m"
605 else:
606 # Check if the copy-from path has ancestors which chain back to the current source_base
607 ancestors = find_svn_ancestors(source_repos_url, source_url, path_offset, source_rev,
608 copyfrom_path, copyfrom_rev, prefix+" ")
609 if ancestors:
610 # The copy-from path has ancestory back to source_url.
611 # ancestors[n] is the original (pre-branch-copy) trunk path.
612 # ancestors[n-1] is the first commit on the new branch.
613 copyfrom_path = ancestors[len(ancestors)-1]['path']
614 copyfrom_rev = ancestors[len(ancestors)-1]['revision']
615 if debug:
616 print prefix + "\x1b[32;1m" + ">> replay_svn_copyfrom: Check copy-from: Found parent: " + copyfrom_path+"@"+str(copyfrom_rev) + "\x1b[0m"
617 if not source_base in copyfrom_path:
618 # If this copy-from path has no ancestry back to source_url, then can't do a "svn copy".
619 # Create (parent) directory if needed
620 p_path = path_offset if is_dir else os.path.dirname(path_offset).strip() or '.'
621 if not os.path.exists(p_path):
622 os.makedirs(p_path)
623 # Export the entire added tree.
624 run_svn(["export", "--force", "-r", str(copyfrom_rev),
625 source_repos_url + copyfrom_path+"@"+str(copyfrom_rev), path_offset])
626 if not in_svn(path_offset):
627 run_svn(["add", "--parents", path_offset])
628 # TODO: Need to copy SVN properties from source repos
629 else:
630 copyfrom_offset = copyfrom_path[len(source_base):].strip('/')
631 if debug:
632 print prefix + "\x1b[32m" + ">> replay_svn_copyfrom: svn_copy: Copy-from: " + copyfrom_path+"@"+str(copyfrom_rev) + "\x1b[0m"
633 # Copy this path from the equivalent path+rev in the target repo, to create the
634 # equivalent history.
635 tgt_rev = get_rev_map(rev_map, copyfrom_rev, prefix+" ")
636 if debug:
637 print prefix + "\x1b[32m" + ">> replay_svn_copyfrom: get_rev_map: " + str(copyfrom_rev) + " (source) -> " + str(tgt_rev) + " (target)" + "\x1b[0m"
638 run_svn(["copy", "-r", tgt_rev, target_url+"/"+copyfrom_offset+"@"+str(tgt_rev), path_offset])
639 # Update the content in this fresh copy to match the final target revision.
640 if is_dir:
641 paths_local = get_svn_dirlist(path_offset)
642 paths_remote = get_svn_dirlist(source_url+"/"+path_offset, source_rev)
643 if debug:
644 print prefix + "\x1b[32m" + "paths_local: " + str(paths_local) + "\x1b[0m"
645 print prefix + "\x1b[32m" + "paths_remote: " + str(paths_remote) + "\x1b[0m"
646 # Update files/folders which exist in remote but not local
647 for path in paths_remote:
648 if not path in paths_local:
649 path_is_dir = True if path[-1] == "/" else False
650 replay_svn_copyfrom(source_repos_url, source_url, path_offset+"/"+path,
651 target_url, source_rev,
652 srcfrom_path+"/"+path, srcfrom_rev,
653 rev_map, path_is_dir, prefix+" ")
654 # Remove files/folders which exist in local but not remote
655 for path in paths_local:
656 if not path in paths_remote:
657 if svnlog_verbose:
658 print " D " + source_base+"/"+path_offset+"/"+path
659 run_svn(["remove", "--force", path_offset+"/"+path])
660 # TODO: Does this handle deleted folders too? Wouldn't want to have a case
661 # where we only delete all files from folder but leave orphaned folder around.
662 else:
663 run_svn(["export", "--force", "-r", str(source_rev),
664 source_repos_url+source_base+"/"+path_offset+"@"+str(source_rev), path_offset])
665
666 def process_svn_log_entry(log_entry, source_repos_url, source_url, target_url, \
667 rev_map, removed_paths = [], commit_paths = [], prefix = ""):
668 """
669 Process SVN changes from the given log entry.
670 Returns array of all the paths in the working-copy that were changed,
671 i.e. the paths which need to be "svn commit".
672
673 'log_entry' is the array structure built by parse_svn_log_xml().
674 'source_repos_url' is the full URL to the root of the source repository.
675 'source_url' is the full URL to the source path in the source repository.
676 'target_url' is the full URL to the target path in the target repository.
677 'rev_map' is the running mapping-table dictionary for source-repo rev #'s
678 to the equivalent target-repo rev #'s.
679 'removed_paths' is the working list of deferred deletions.
680 'commit_paths' is the working list of specific paths which changes to pass
681 to the final "svn commit".
682 """
683 # Get the relative offset of source_url based on source_repos_url
684 # e.g. '/branches/bug123'
685 source_base = source_url[len(source_repos_url):]
686 source_rev = log_entry['revision']
687 if debug:
688 print prefix + "\x1b[32m" + ">> process_svn_log_entry: " + source_url+"@"+str(source_rev) + "\x1b[0m"
689 for d in log_entry['changed_paths']:
690 # Get the full path for this changed_path
691 # e.g. '/branches/bug123/projectA/file1.txt'
692 path = d['path']
693 if not path.startswith(source_base + "/"):
694 # Ignore changed files that are not part of this subdir
695 if path != source_base:
696 if debug:
697 print prefix + "\x1b[90m" + ">> process_svn_log_entry: Unrelated path: " + path + " (" + source_base + ")" + "\x1b[0m"
698 continue
699 # Calculate the offset (based on source_base) for this changed_path
700 # e.g. 'projectA/file1.txt'
701 # (path = source_base + "/" + path_offset)
702 path_offset = path[len(source_base):].strip("/")
703 # Get the action for this path
704 action = d['action']
705 if action not in 'MARD':
706 display_error("In SVN rev. %d: action '%s' not supported. \
707 Please report a bug!" % (source_rev, action))
708
709 # Try to be efficient and keep track of an explicit list of paths in the
710 # working copy that changed. If we commit from the root of the working copy,
711 # then SVN needs to crawl the entire working copy looking for pending changes.
712 # But, if we gather too many paths to commit, then we wipe commit_paths below
713 # and end-up doing a commit at the root of the working-copy.
714 if len (commit_paths) < 100:
715 commit_paths.append(path_offset)
716
717 # Special-handling for replace's
718 if action == 'R':
719 if svnlog_verbose:
720 msg = " " + action + " " + d['path']
721 if d['copyfrom_path']:
722 msg += " (from " + d['copyfrom_path']+"@"+str(d['copyfrom_revision']) + ")"
723 print prefix + msg
724 # If file was "replaced" (deleted then re-added, all in same revision),
725 # then we need to run the "svn rm" first, then change action='A'. This
726 # lets the normal code below handle re-"svn add"'ing the files. This
727 # should replicate the "replace".
728 run_svn(["remove", "--force", path_offset])
729 action = 'A'
730
731 # Handle all the various action-types
732 # (Handle "add" first, for "svn copy/move" support)
733 if action == 'A':
734 if svnlog_verbose:
735 msg = " " + action + " " + d['path']
736 if d['copyfrom_path']:
737 msg += " (from " + d['copyfrom_path']+"@"+str(d['copyfrom_revision']) + ")"
738 print prefix + msg
739 # If we have any queued deletions for this same path, remove those if we're re-adding this path.
740 if (path_offset) in removed_paths:
741 removed_paths.remove(path_offset)
742 # Determine where to export from.
743 copyfrom_path = path
744 copyfrom_rev = source_rev
745 svn_copy = False
746 path_is_dir = True if d['kind'] == 'dir' else False
747 # Handle cases where this "add" was a copy from another URL in the source repos
748 if d['copyfrom_revision']:
749 copyfrom_path = d['copyfrom_path']
750 copyfrom_rev = d['copyfrom_revision']
751 replay_svn_copyfrom(source_repos_url, source_url, path_offset, target_url, source_rev,
752 copyfrom_path, copyfrom_rev, rev_map, path_is_dir, prefix+" ")
753 # Else just "svn export" the files from the source repo and "svn add" them.
754 else:
755 # Create (parent) directory if needed
756 p_path = path_offset if path_is_dir else os.path.dirname(path_offset).strip() or '.'
757 if not os.path.exists(p_path):
758 os.makedirs(p_path)
759 # Export the entire added tree.
760 run_svn(["export", "--force", "-r", str(copyfrom_rev),
761 source_repos_url + copyfrom_path+"@"+str(copyfrom_rev), path_offset])
762 if not in_svn(path_offset):
763 run_svn(["add", "--parents", path_offset])
764 # TODO: Need to copy SVN properties from source repos
765
766 elif action == 'D':
767 # Queue "svn remove" commands, to allow the action == 'A' handling the opportunity
768 # to do smart "svn copy" handling on copy/move/renames.
769 if not (path_offset) in removed_paths:
770 removed_paths.append(path_offset)
771
772 elif action == 'M':
773 if svnlog_verbose:
774 print prefix + " " + action + " " + d['path']
775 # TODO: Is "svn merge -c" correct here? Should this just be an "svn export" plus
776 # proplist updating?
777 out = run_svn(["merge", "-c", str(source_rev), "--non-recursive",
778 "--non-interactive", "--accept=theirs-full",
779 source_url+"/"+path_offset+"@"+str(source_rev), path_offset])
780
781 else:
782 display_error("Internal Error: process_svn_log_entry: Unhandled 'action' value: '" + action + "'")
783
784 return commit_paths
785
786 def disp_svn_log_summary(log_entry):
787 print "\n(Starting source rev #"+str(log_entry['revision'])+":)"
788 print "r"+str(log_entry['revision']) + " | " + \
789 log_entry['author'] + " | " + \
790 str(datetime.fromtimestamp(int(log_entry['date'])).isoformat(' '))
791 print log_entry['message']
792 print "------------------------------------------------------------------------"
793
794 def pull_svn_rev(log_entry, source_repos_url, source_repos_uuid, source_url, target_url, rev_map, keep_author=False):
795 """
796 Pull SVN changes from the given log entry.
797 Returns the new SVN revision.
798 If an exception occurs, it will rollback to revision 'source_rev - 1'.
799 """
800 disp_svn_log_summary(log_entry)
801 source_rev = log_entry['revision']
802
803 # Process all the paths in this log entry
804 removed_paths = []
805 commit_paths = []
806 process_svn_log_entry(log_entry, source_repos_url, source_url, target_url,
807 rev_map, removed_paths, commit_paths)
808 # Process any deferred removed actions
809 if removed_paths:
810 path_base = source_url[len(source_repos_url):]
811 for path_offset in removed_paths:
812 if svnlog_verbose:
813 print " D " + path_base+"/"+path_offset
814 run_svn(["remove", "--force", path_offset])
815
816 # If we had too many individual paths to commit, wipe the list and just commit at
817 # the root of the working copy.
818 if len (commit_paths) > 99:
819 commit_paths = []
820
821 # Add source-tracking revprop's
822 revprops = [{'name':'source_uuid', 'value':source_repos_uuid},
823 {'name':'source_url', 'value':source_url},
824 {'name':'source_rev', 'value':source_rev}]
825 commit_from_svn_log_entry(log_entry, commit_paths, keep_author=keep_author, revprops=revprops)
826 print "(Finished source rev #"+str(source_rev)+")"
827
828 def main():
829 usage = "Usage: %prog [-a] [-c] [-r SVN rev] source_url target_url"
830 parser = OptionParser(usage)
831 parser.add_option("-r", "--revision", type="int", dest="svn_rev", metavar="REV",
832 help="initial SVN revision to checkout from")
833 parser.add_option("-a", "--keep-author", action="store_true", dest="keep_author",
834 help="maintain original Author info from source repo")
835 parser.add_option("-c", "--continue", action="store_true", dest="cont_from_break",
836 help="continue from previous break")
837 parser.add_option("-v", "--verbose", action="store_true", dest="verbose",
838 help="show 'svn status'-style messages for each action replayed [default]")
839 parser.add_option("-q", "--quiet", action="store_false", dest="verbose",
840 help="show only minimal status/progress messages")
841 parser.set_defaults(verbose=True)
842 group = OptionGroup(parser, "Debug Options")
843 group.add_option("--debug", action="store_true", dest="debug_all",
844 help="enable all debugging options")
845 group.add_option("--debug-showcmds", action="store_true", dest="debug_showcmds",
846 help="display each SVN command being executed")
847 group.add_option("--debug-debugmsgs", action="store_true", dest="debug_debugmsgs",
848 help="display debug messages")
849 parser.add_option_group(group)
850 (options, args) = parser.parse_args()
851 if len(args) != 2:
852 display_error("incorrect number of arguments\n\nTry: svn2svn.py --help",
853 False)
854
855 source_url = args.pop(0).rstrip("/")
856 target_url = args.pop(0).rstrip("/")
857 if options.keep_author:
858 keep_author = True
859 else:
860 keep_author = False
861
862 # Find the greatest_rev in the source repo
863 svn_info = get_svn_info(source_url)
864 greatest_rev = svn_info['revision']
865 # Get the base URL for the source repos, e.g. 'svn://svn.example.com/svn/repo'
866 source_repos_url = svn_info['repos_url']
867 # Get the UUID for the source repos
868 source_repos_uuid = svn_info['repos_uuid']
869
870 dup_wc = "_dup_wc"
871 rev_map = {}
872 global debug, runsvn_showcmd, svnlog_verbose
873
874 if options.debug_debugmsgs:
875 debug = True
876 if options.debug_showcmds:
877 runsvn_showcmd = True
878 if options.debug_all:
879 debug = True
880 runsvn_showcmd = True
881 if options.verbose:
882 svnlog_verbose = True
883
884 # if old working copy does not exist, disable continue mode
885 # TODO: Better continue support. Maybe include source repo's rev # in target commit info?
886 if not os.path.exists(dup_wc):
887 options.cont_from_break = False
888
889 if not options.cont_from_break:
890 # Warn if Target SVN URL existed
891 cmd = find_program("svn")
892 pipe = Popen([cmd] + ["list"] + [target_url], executable=cmd,
893 stdout=PIPE, stderr=PIPE)
894 out, err = pipe.communicate()
895 if pipe.returncode == 0:
896 print "Target SVN URL: %s existed!" % target_url
897 if out:
898 print out
899 print "Press 'Enter' to Continue, 'Ctrl + C' to Cancel..."
900 print "(Timeout in 5 seconds)"
901 rfds, wfds, efds = select.select([sys.stdin], [], [], 5)
902
903 # Get log entry for the SVN revision we will check out
904 if options.svn_rev:
905 # If specify a rev, get log entry just before or at rev
906 svn_start_log = get_last_svn_log_entry(source_url, 1, options.svn_rev, False)
907 else:
908 # Otherwise, get log entry of branch creation
909 # TODO: This call is *very* expensive on a repo with lots of revisions.
910 # Even though the call is passing --limit 1, it seems like that limit-filter
911 # is happening after SVN has fetched the full log history.
912 svn_start_log = get_first_svn_log_entry(source_url, 1, greatest_rev, False)
913
914 # This is the revision we will start from for source_url
915 source_rev = svn_start_log['revision']
916
917 # Check out a working copy of target_url
918 dup_wc = os.path.abspath(dup_wc)
919 if os.path.exists(dup_wc):
920 shutil.rmtree(dup_wc)
921 svn_checkout(target_url, dup_wc)
922 os.chdir(dup_wc)
923
924 # For the initial commit to the target URL, export all the contents from
925 # the source URL at the start-revision.
926 paths = run_svn(["list", "-r", str(source_rev), source_url+"@"+str(source_rev)])
927 if len(paths)>1:
928 disp_svn_log_summary(get_one_svn_log_entry(source_url, source_rev, source_rev))
929 print "(Initial import)"
930 paths = paths.strip("\n").split("\n")
931 for path in paths:
932 # For each top-level file/folder...
933 if not path:
934 # Skip null lines
935 break
936 # Directories have a trailing slash in the "svn list" output
937 path_is_dir = True if path[-1] == "/" else False
938 if path_is_dir:
939 path=path.rstrip('/')
940 if not os.path.exists(path):
941 os.makedirs(path)
942 run_svn(["export", "--force", "-r" , str(source_rev), source_url+"/"+path+"@"+str(source_rev), path])
943 run_svn(["add", path])
944 revprops = [{'name':'source_uuid', 'value':source_repos_uuid},
945 {'name':'source_url', 'value':source_url},
946 {'name':'source_rev', 'value':source_rev}]
947 commit_from_svn_log_entry(svn_start_log, [], keep_author=keep_author, revprops=revprops)
948 print "(Finished source rev #"+str(source_rev)+")"
949 else:
950 dup_wc = os.path.abspath(dup_wc)
951 os.chdir(dup_wc)
952 # TODO: Need better resume support. For the time being, expect caller explictly passes in resume revision.
953 source_rev = options.svn_rev
954 if source_rev < 1:
955 display_error("Invalid arguments\n\nNeed to pass result rev # (-r) when using continue-mode (-c)", False)
956
957 # Load SVN log starting from source_rev + 1
958 it_log_entries = iter_svn_log_entries(source_url, source_rev + 1, greatest_rev)
959
960 try:
961 for log_entry in it_log_entries:
962 # Replay this revision from source_url into target_url
963 pull_svn_rev(log_entry, source_repos_url, source_repos_uuid, source_url,
964 target_url, rev_map, keep_author)
965 # Update our target working-copy, to ensure everything says it's at the new HEAD revision
966 run_svn(["up", dup_wc])
967 # Update rev_map, mapping table of source-repo rev # -> target-repo rev #
968 dup_info = get_svn_info(target_url)
969 dup_rev = dup_info['revision']
970 source_rev = log_entry['revision']
971 rev_map[source_rev] = dup_rev
972
973 except KeyboardInterrupt:
974 print "\nStopped by user."
975 run_svn(["cleanup"])
976 run_svn(["revert", "--recursive", "."])
977 except:
978 print "\nCommand failed with following error:\n"
979 traceback.print_exc()
980 run_svn(["cleanup"])
981 run_svn(["revert", "--recursive", "."])
982 finally:
983 run_svn(["up"])
984 print "\nFinished!"
985
986
987 if __name__ == "__main__":
988 main()
989
990 # vim:sts=4:sw=4: