File: //usr/bin/X11/X11/X11/X11/svn-hot-backup
#!/usr/bin/python3
# -*- coding: utf-8 -*-
#
#  hot-backup.py: perform a "hot" backup of a Subversion repository
#                 and clean any old Berkeley DB logfiles after the
#                 backup completes, if the repository backend is
#                 Berkeley DB.
#
#  Subversion is a tool for revision control. 
#  See http://subversion.apache.org for more information.
#    
# ====================================================================
#    Licensed to the Apache Software Foundation (ASF) under one
#    or more contributor license agreements.  See the NOTICE file
#    distributed with this work for additional information
#    regarding copyright ownership.  The ASF licenses this file
#    to you under the Apache License, Version 2.0 (the
#    "License"); you may not use this file except in compliance
#    with the License.  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing,
#    software distributed under the License is distributed on an
#    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
#    KIND, either express or implied.  See the License for the
#    specific language governing permissions and limitations
#    under the License.
# ====================================================================
# $HeadURL: https://svn.apache.org/repos/asf/subversion/branches/1.14.x/tools/backup/hot-backup.py.in $
# $LastChangedDate: 2020-11-14 04:00:12 +0000 (Sat, 14 Nov 2020) $
# $LastChangedBy: svn-role $
# $LastChangedRevision: 1883406 $
######################################################################
import sys, os, getopt, stat, re, time, shutil, subprocess
import functools
######################################################################
# Global Settings
# Path to svnlook utility
svnlook = r"/usr/bin/svnlook"
# Path to svnadmin utility
svnadmin = r"/usr/bin/svnadmin"
# Default number of backups to keep around (0 for "keep them all")
num_backups = int(os.environ.get("SVN_HOTBACKUP_BACKUPS_NUMBER", 64))
# Archive types/extensions
archive_map = {
  'gz'  : ".tar.gz",
  'bz2' : ".tar.bz2",
  'zip' : ".zip",
  'zip64' : ".zip"
  }
# Chmod recursively on a whole subtree
def chmod_tree(path, mode, mask):
  for dirpath, dirs, files in os.walk(path):
    for name in dirs + files:
      fullname = os.path.join(dirpath, name)
      if not os.path.islink(fullname):
        new_mode = (os.stat(fullname)[stat.ST_MODE] & ~mask) | mode
        os.chmod(fullname, new_mode)
# For clearing away read-only directories
def safe_rmtree(dirname, retry=0):
  "Remove the tree at DIRNAME, making it writable first"
  def rmtree(dirname):
    chmod_tree(dirname, 0o666, 0o666)
    shutil.rmtree(dirname)
  if not os.path.exists(dirname):
    return
  if retry:
    for delay in (0.5, 1, 2, 4):
      try:
        rmtree(dirname)
        break
      except:
        time.sleep(delay)
    else:
      rmtree(dirname)
  else:
    rmtree(dirname)
######################################################################
# Command line arguments
def usage(out = sys.stdout):
  scriptname = os.path.basename(sys.argv[0])
  out.write(
"""USAGE: %s [OPTIONS] REPOS_PATH BACKUP_PATH
Create a backup of the repository at REPOS_PATH in a subdirectory of
the BACKUP_PATH location, named after the youngest revision.
Options:
  --archive-type=FMT Create an archive of the backup. FMT can be one of:
                       bz2  : Creates a bzip2 compressed tar file.
                       gz   : Creates a gzip compressed tar file.
                       zip  : Creates a compressed zip file.
                       zip64: Creates a zip64 file (can be > 2GB).
  --num-backups=N    Number of prior backups to keep around (0 to keep all).
  --verify           Verify the backup.
  --help      -h     Print this help message and exit.
""" % (scriptname,))
try:
  opts, args = getopt.gnu_getopt(sys.argv[1:], "h?", ["archive-type=",
                                                      "num-backups=",
                                                      "verify",
                                                      "help"])
except getopt.GetoptError as e:
  sys.stderr.write("ERROR: %s\n\n" % e)
  sys.stderr.flush()
  usage(sys.stderr)
  sys.exit(2)
archive_type = None
verify_copy = False
for o, a in opts:
  if o == "--archive-type":
    archive_type = a
  elif o == "--num-backups":
    num_backups = int(a)
  elif o == "--verify":
    verify_copy = True
  elif o in ("-h", "--help", "-?"):
    usage()
    sys.exit()
if archive_type not in (None, 'bz2', 'gz', 'zip', 'zip64'):
  sys.stderr.write("ERROR: Bad --archive-type\n")
  usage(sys.stderr)
  sys.exit(2)
if len(args) != 2:
  sys.stderr.write("ERROR: only two arguments allowed.\n\n")
  sys.stderr.flush()
  usage(sys.stderr)
  sys.exit(2)
# Path to repository
repo_dir = args[0]
repo = os.path.basename(os.path.abspath(repo_dir))
# Where to store the repository backup.  The backup will be placed in
# a *subdirectory* of this location, named after the youngest
# revision.
backup_dir = args[1]
# Added to the filename regexp, set when using --archive-type.
ext_re = ""
# Do we want to create an archive of the backup
if archive_type:
  if archive_type in archive_map:
    # Additionally find files with the archive extension.
    ext_re = "(" + re.escape(archive_map[archive_type]) + ")?"
  else:
    sys.stderr.write("Unknown archive type '%s'.\n\n\n" % archive_type)
    sys.stderr.flush()
    usage(sys.stderr)
    sys.exit(2)
######################################################################
# Helper functions
def comparator(a, b):
  # We pass in filenames so there is never a case where they are equal.
  regexp = re.compile("-(?P<revision>[0-9]+)(-(?P<increment>[0-9]+))?" +
                      ext_re + "$")
  matcha = regexp.search(a)
  matchb = regexp.search(b)
  reva = int(matcha.groupdict()['revision'])
  revb = int(matchb.groupdict()['revision'])
  if (reva < revb):
    return -1
  elif (reva > revb):
    return 1
  else:
    inca = matcha.groupdict()['increment']
    incb = matchb.groupdict()['increment']
    if not inca:
      return -1
    elif not incb:
      return 1
    elif (int(inca) < int(incb)):
      return -1
    else:
      return 1
def get_youngest_revision():
  """Examine the repository REPO_DIR using the svnlook binary
  specified by SVNLOOK, and return the youngest revision."""
  p = subprocess.Popen([svnlook, 'youngest', '--', repo_dir],
                       stdin=subprocess.PIPE,
                       stdout=subprocess.PIPE,
                       stderr=subprocess.PIPE)
  infile, outfile, errfile = p.stdin, p.stdout, p.stderr
  stdout_lines = outfile.readlines()
  stderr_lines = errfile.readlines()
  outfile.close()
  infile.close()
  errfile.close()
  if stderr_lines:
    raise Exception("Unable to find the youngest revision for repository '%s'"
                    ": %s" % (repo_dir, stderr_lines[0].rstrip()))
  return stdout_lines[0].strip().decode()
######################################################################
# Main
print("Beginning hot backup of '"+ repo_dir + "'.")
### Step 1: get the youngest revision.
try:
  youngest = get_youngest_revision()
except Exception as e:
  sys.stderr.write("%s\n" % e)
  sys.stderr.flush()
  sys.exit(1)
print("Youngest revision is %s" % youngest)
### Step 2: Find next available backup path
backup_subdir = os.path.join(backup_dir, repo + "-" + youngest)
# If there is already a backup of this revision, then append the
# next highest increment to the path. We still need to do a backup
# because the repository might have changed despite no new revision
# having been created. We find the highest increment and add one
# rather than start from 1 and increment because the starting
# increments may have already been removed due to num_backups.
regexp = re.compile("^" + re.escape(repo) + "-" + re.escape(youngest) +
                    "(-(?P<increment>[0-9]+))?" + ext_re + "$")
directory_list = os.listdir(backup_dir)
young_list = [x for x in directory_list if regexp.search(x)]
if young_list:
  young_list.sort(key = functools.cmp_to_key(comparator))
  increment = regexp.search(young_list.pop()).groupdict()['increment']
  if increment:
    backup_subdir = os.path.join(backup_dir, repo + "-" + youngest + "-"
                                 + str(int(increment) + 1))
  else:
    backup_subdir = os.path.join(backup_dir, repo + "-" + youngest + "-1")
### Step 3: Ask subversion to make a hot copy of a repository.
###         copied last.
print("Backing up repository to '" + backup_subdir + "'...")
err_code = subprocess.call([svnadmin, "hotcopy", "--clean-logs",
                            '--', repo_dir, backup_subdir])
if err_code != 0:
  sys.stderr.write("Unable to backup the repository.\n")
  sys.stderr.flush()
  sys.exit(err_code)
else:
  print("Done.")
### Step 4: Verify the hotcopy
if verify_copy:
  print("Verifying backup...")
  err_code = subprocess.call([svnadmin, "verify", "--quiet", '--', backup_subdir])
  if err_code != 0:
    sys.stderr.write("Backup verification failed.\n")
    sys.stderr.flush()
    sys.exit(err_code)
  else:
    print("Done.")
### Step 5: Make an archive of the backup if required.
if archive_type:
  archive_path = backup_subdir + archive_map[archive_type]
  err_msg = ""
  print("Archiving backup to '" + archive_path + "'...")
  if archive_type == 'gz' or archive_type == 'bz2':
    try:
      import tarfile
      tar = tarfile.open(archive_path, 'w:' + archive_type)
      tar.add(backup_subdir, os.path.basename(backup_subdir))
      tar.close()
    except ImportError as e:
      err_msg = "Import failed: " + str(e)
      err_code = -2
    except tarfile.TarError as e:
      err_msg = "Tar failed: " + str(e)
      err_code = -3
  elif archive_type == 'zip' or archive_type == 'zip64':
    try:
      import zipfile
      
      def add_to_zip(zp, root, dirname, names):
        root = os.path.join(root, '')
        
        for file in names:
          path = os.path.join(dirname, file)
          if os.path.isfile(path):
            zp.write(path, path[len(root):])
          elif os.path.isdir(path) and os.path.islink(path):
            for dirpath, dirs, files in os.walk(path):
              add_to_zip(zp, path, dirpath, dirs + files)
            
      zp = zipfile.ZipFile(archive_path, 'w', zipfile.ZIP_DEFLATED, archive_type == 'zip64')
      for dirpath, dirs, files in os.walk(backup_subdir):
        add_to_zip(zp, backup_dir, dirpath, dirs + files)
      zp.close()
    except ImportError as e:
      err_msg = "Import failed: " + str(e)
      err_code = -4
    except zipfile.error as e:
      err_msg = "Zip failed: " + str(e)
      err_code = -5
  if err_code != 0:
    sys.stderr.write("Unable to create an archive for the backup.\n%s\n" % err_msg)
    sys.stderr.flush()
    sys.exit(err_code)
  else:
    print("Archive created, removing backup '" + backup_subdir + "'...")
    safe_rmtree(backup_subdir, 1)
### Step 6: finally, remove all repository backups other than the last
###         NUM_BACKUPS.
if num_backups > 0:
  regexp = re.compile("^" + re.escape(repo) + "-[0-9]+(-[0-9]+)?" + ext_re + "$")
  directory_list = os.listdir(backup_dir)
  old_list = [x for x in directory_list if regexp.search(x)]
  old_list.sort(key = functools.cmp_to_key(comparator))
  del old_list[max(0,len(old_list)-num_backups):]
  for item in old_list:
    old_backup_item = os.path.join(backup_dir, item)
    print("Removing old backup: " + old_backup_item)
    if os.path.isdir(old_backup_item):
      safe_rmtree(old_backup_item, 1)
    else:
      os.remove(old_backup_item)