#!/usr/bin/python
# -*- coding: UTF-8 -*-

import sys
import os
import stat
import re
from optparse import Option, OptionParser

try:
    from spacewalk.common import checksum
    from spacewalk.common.rhnLog import initLOG, log_debug
    from spacewalk.common.rhnConfig import CFG, initCFG
    from spacewalk.server import rhnSQL
    from spacewalk.server.rhnPackage import unlink_package_file
except:
    _LIBPATH = "/usr/share/rhn"
    # add to the path if need be
    if _LIBPATH not in sys.path:
        sys.path.append(_LIBPATH)
    from common import CFG, initCFG, initLOG, log_debug
    from server import rhnSQL
    from server.rhnPackage import unlink_package_file

LOG_FILE='/var/log/rhn/spacewalk-data-fsck.log'

report_msg = {
        'file'      : "%5d files scanned",
        'exists'    : "ERROR: %5d files missing on disk",
        'dbexists'  : "ERROR: %5d files missing in database",
        'restore'   : "ERROR: %5d file paths could not be restored",
        'size'      : "ERROR: %5d file size mismatch(es)",
        'checksum'  : "ERROR: %5d file checksum mismatch(es)",
        'nevrao'    : "ERROR: %5d file NEVRAO mismatch(es)",
        }
report = {}

def is_sha256_capable():
    import rpm
    ts = rpm.TransactionSet()
    mi = ts.dbMatch('Providename', 'spacewalk-schema')
    cmp = -1
    try:
      h = mi.next()
      cmp = rpm.labelCompare([h['name'], h['version'], h['release']],
                             [h['name'], '0.8.1','1'])
    except StopIteration:
      pass
    return (cmp > 0)


def db_init():
    initCFG('server')
    rhnSQL.initDB()

def package_query(options, bind_path=False):
    query = """select %s
                 from %s
                     """
    columns = "p.id, p.org_id, p.package_size, p.path"
    tables  = "rhnPackage p"
    if options.checksum or options.nevrao:
        if is_sha256_capable():
            columns += ", c.checksum, c.checksum_type"
            tables  += " join rhnChecksumView c on p.checksum_id = c.id"
        else:
            columns += ", p.md5sum as checksum, 'md5sum' as checksum_type"
    if options.nevrao:
        columns += ", n.name, evr.epoch, evr.version, evr.release, a.label as arch"
        tables  += " join rhnPackageName n on p.name_id = n.id"
        tables  += " join rhnPackageEVR evr on p.evr_id = evr.id"
        tables  += " join rhnPackageArch a on p.package_arch_id = a.id"
    if bind_path:
        query += "where p.path = :path"
    return query % (columns, tables)

def check_db_vs_disk(options):
    for k in report_msg.keys():
        report[k] = 0
    query  = package_query(options)
    h = rhnSQL.prepare(query)

    path_prefix = CFG.MOUNT_POINT

    h.execute()

    while 1:
        row = h.fetchone_dict()
        if not row:
            break

        if row['path']:
            abs_path = os.path.join(path_prefix, row['path'])
            log(3,abs_path)
            report['file'] += 1

            if check_disk_exists(abs_path) == 1:
                report['exists'] += 1
                not_on_disk(row, abs_path, options)
            elif options.restore:
                report['restore'] +=  check_disk_nevrao(abs_path, row.copy(), True)
            else:
                if options.size:
                    report['size'] += check_disk_size(abs_path, row['package_size'])
                if options.nevrao:
                    report['nevrao'] += check_disk_nevrao(abs_path, row.copy())
                if options.checksum:
                    report['checksum'] += check_disk_checksum(abs_path,
                                               row['checksum_type'], row['checksum'])
    h.close()
    error_found=0
    for i in ['file', 'exists', 'restore', 'size', 'nevrao', 'checksum']:
        if report[i] > 0:
            log(1, report_msg[i] % report[i])
            if i != 'file':
                error_found = 1
    return error_found

def check_disk_exists(abs_path):
    return not os.path.isfile(abs_path)

def not_on_disk(row, abs_path, options):
    if options.remove:
        log(0, "Removed package path from db: %s" % row['path'])
        rhnSQL.execute('update rhnPackage set path = NULL where id = :id', id=row['id'])
        rhnSQL.commit()
    else:
        log(0, "File missing: %s" % abs_path)

def check_disk_size(abs_path, size):
    file_size = os.stat(abs_path)[stat.ST_SIZE]
    ret = 0
    if file_size != size:
        log(0, "File size mismatch: %s (%s vs. %s)" % (abs_path, size, file_size))
        ret = 1
    return ret

def check_disk_nevrao(abs_path, row, restore=None):
    file = {};
    (_redhat_,
      file['org_id'],
      file['checksum_prefix'],
      file['name'],
      file['evr'],
      file['arch'],
      file['checksum'],
      file['basename']) = row['path'].split('/')
    if not row['org_id']:
        row['org_id'] = 'NULL'
    else:
        row['org_id'] = str(row['org_id'])
    row['checksum_prefix'] = row['checksum'][:3]
    if row['epoch']:
        row['evr'] = row['epoch'] + ':'
    else:
        row['evr'] = ''
    row['evr'] += row['version'] + '-' + row['release']
    row['basename'] = "%s-%s-%s.%s.%s" % (
                row['name'], row['version'], row['release'],
                row['arch'], row['path'][-3:])
    ret = 0
    for key in ('org_id', 'checksum_prefix', 'name', 'evr', 'arch', 'checksum',
            'basename'):
        if file[key] != row[key]:
            log(0, "File path mismatch: %s (%s: %s vs. %s)" % (abs_path, key, row[key],
                                                               file[key]))
            if restore:
                log(2, "Begining file path restoration: %s" % abs_path)
                return restore_file_path(file, row)
            ret = 1
    return ret


def restore_file_path(file, row):
    prefix = CFG.MOUNT_POINT + '/'
    new_path = CFG.PREPENDED_DIR + '/' + row['org_id'] + '/' + row['checksum_prefix'] + '/' + row['name'] + '/' + row['evr'] + '/' + row['arch'] + '/' + row['checksum'] + '/' + row['basename']
    query = "update rhnPackage set path=\'" + new_path + "\' where id=" + str(row['id'])
    new_path = prefix + new_path
    cleanup = True
    try: #1st, make dirs needed
        os.makedirs(new_path.rsplit('/', 1)[0])
    except OSError, e:
        if e.errno!=17: #something broke up
            log(0, "File path restoration failed: %s" % row['path'])
            return 1
        else:
            cleanup = False #dir already exists, do not cleanup
    try: #2nd, create hardlink; os.link and os.makedirs in 1 dir would be problem due to checking of errno 17...
        os.link(prefix + row['path'], new_path)
    except OSError:
        log(0, "File path restoration failed, rolling back: %s" % row['path'])
        if cleanup==True:
            try:
                os.removedirs(new_path.rsplit('/', 1)[0])
            except OSError, e:
                if e.errno!=39:
                    log(0, "Could not rollback file path restoration: %s" % row['path'])
                    return 1
        return
    h = rhnSQL.prepare(query)
    h.execute()
    rhnSQL.commit()
    try:
        os.remove(prefix + row['path'])
        os.removedirs(prefix + row['path'].rsplit('/', 1)[0])
    except OSError, e:
        if e.errno==2:
            log(2, "File %s%s was removed by someone else but transaction has been successfully completed" % (prefix, row['path']))
        elif e.errno!=39:
            log(4, "Unexpected error, computer is going to self destruct")
    log(0, "File %s%s successfully restored, new file path: %s" % (prefix, row['path'], new_path))
    return 0

def check_disk_checksum(abs_path, checksum_type, db_checksum):
    try:
        fp = file(abs_path ,'rb')
        h = checksum.getHashlibInstance(checksum_type, False)
        h.update(fp.read())
        file_checksum = h.hexdigest()
        fp.close()
    except:
        file_checksum = None
    ret = 0
    if file_checksum != db_checksum:
        log(0, "File checksum mismatch: %s (%s: %s vs. %s)" %
            (abs_path, checksum_type, db_checksum, file_checksum))
        if options.remove_mismatch:
            remove_mismatch(abs_path, options)
        ret = 1
    return ret

def check_disk_vs_db(disk_content=None, db_content=None):
    for k in report_msg.keys():
        report[k] = 0
    query  = package_query(options, bind_path=True)
    h = rhnSQL.prepare(query)
    for root, dirs, files in os.walk(os.path.join(CFG.MOUNT_POINT, CFG.PREPENDED_DIR)):
      rel_root = root[len(CFG.MOUNT_POINT)+1:]
      for f in files:
          abs_path = os.path.join(root, f)
          rel_path = os.path.join(rel_root, f)
          log(3,abs_path)
          report['file'] += 1

          h.execute(path=rel_path)
          row = h.fetchone_dict()

          if not row:
              report['dbexists'] += 1
              not_in_db(abs_path, options)
              continue

          if options.size and options.fs_only:
                    report['size'] += check_disk_size(abs_path, row['package_size'])
          if options.nevrao and options.fs_only:
                    report['nevrao'] += check_disk_nevrao(abs_path, row.copy())
          if options.checksum and options.fs_only:
                    report['checksum'] += check_disk_checksum(abs_path,
                                               row['checksum_type'], row['checksum'])
    h.close()
    error_found=0
    for i in ['file', 'dbexists', 'size', 'nevrao', 'checksum']:
        if report[i] > 0:
            log(1, report_msg[i] % report[i])
            if i != 'file':
                error_found = 1
    return error_found

def not_in_db(path, options):
    if options.remove:
        log(0, "Removed file missing in db: %s" % (path))
        unlink_package_file(path)
    else:
        log(0, "File missing in db: %s" % (path))

def remove_mismatch(path, options):
    log(0, "Removed file because checksum does not match: %s" % (path))
    unlink_package_file(path)

def log(level, *args):
    log_debug(level, *args)
    verbose = options.verbose
    if not verbose:
        verbose = 0
    if verbose >= level:
        print (', '.join(map(lambda i: str(i), args)))

if __name__ == '__main__':

    options_table = [
        Option("-v", "--verbose",       action="count",
            help="Increase verbosity"),
        Option("-S", "--no-size",       action="store_false", dest="size", default=True,
            help="Don't check package size"),
        Option("-C", "--no-checksum",   action="store_false", dest="checksum", default=True,
            help="Don't check package checksum"),
        Option("-O", "--no-nevrao",     action="store_false", dest="nevrao", default=True,
            help="Don't check package name, epoch, version, release, arch, org"),
        Option("-d", "--db-only",       action="store_true",
            help="Check only if packages from database are present on filesystem"),
        Option("-f", "--fs-only",       action="store_true",
            help="Check only if packages from filesystem are in the database"),
        Option("-r", "--remove",        action="store_true",
            help="Automaticaly remove packages from filesystem not present in database"),
        Option("-R", "--remove-mismatch", action="store_true", dest="remove_mismatch", default=False,
            help="Automatically remove packages from filesystem that does not match the checksum stored in database. (not valid with --db-only)"),
        Option("-F", "--fix-file-path", action="store_true", dest="restore", default=False,
            help="Restores file paths, try this when you have NEVRAO mismatches. Do not run this command with another commands"),
    ]
    parser = OptionParser(option_list=options_table)
    (options, args) = parser.parse_args()

    initLOG(LOG_FILE, options.verbose or 0)

    db_init()

    exit_value = 0
    if options.remove_mismatch:
        if options.db_only or not options.checksum:
            log(0, "Syntax error: The option --remove-mismatch cannot be used with --db-only or --no-checksum")
            sys.exit(1)
    if not options.fs_only:
        log(1, "Checking if packages from database are present on filesystem")
        exit_value += check_db_vs_disk(options)
    if not options.db_only:
        log(1, "Checking if packages from filesystem are present in database")
        exit_value += check_disk_vs_db(options)

    sys.exit(exit_value)
