#!/usr/bin/python
#
# Utility for archiving/purging audit-log tables
#
# Copyright (c) 2013 Red Hat, Inc.
#
#
# This software is licensed to you under the GNU General Public License,
# version 2 (GPLv2). There is NO WARRANTY for this software, express or
# implied, including the implied warranties of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. You should have received a copy of GPLv2
# along with this software; if not, see
# http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
#
# Red Hat trademarks are not licensed under GPLv2. No permission is
# granted to use or replicate Red Hat trademarks that are incorporated
# in this software or its documentation.
#

"""spacewalk-archive-audits - a tool for extracting and purging the _log tables in the SW database"""

import csv
import os
import sys
import time

from os import getenv
from optparse import OptionParser, OptionGroup
from csv import writer
from datetime import datetime

try:
    from spacewalk.common import rhnLog
    from spacewalk.common.rhnConfig import CFG, initCFG
    from spacewalk.common.rhnLog import LOG, log_clean
    from spacewalk.server import rhnSQL
except ImportError:
    # pylint: disable=F0401
    _LIBPATH = '/usr/share/rhn'
    if _LIBPATH not in sys.path:
        sys.path.append(_LIBPATH)
    from server import rhnSQL
    from common import rhnLog
    from common.rhnLog import LOG, log_clean
    from common.rhnConfig import CFG, initCFG

LOG_LOCATION = '/var/log/rhn/archive-audit-tables.log'
LOG_COLS = ['id', 'stamp', 'user_id']

SQL_PIECES = {'oracle': {
                  'COUNT' : 'select count(*) from %(table)s',
                  'FIND_COLS' : "select column_name from user_tab_columns where table_name = :the_table",
                  'FIND_TABLE' : "select table_name from user_tables where table_name like '%|_LOG' escape '|'",
                  'GET_DATA' : "select %(cols)s, %(tcols)s from %(tname)s t inner join log l on l.id = t.log_id where l.stamp <= to_date(:tstamp, 'YYYY-MM-DD hh24:mi:ss') order by l.id",
                  'PURGE_DATA' : """
                    delete %(ptable)s
                     where exists (
                       select *
                         from log l
                        where l.id = %(ptable)s.log_id
                          and l.stamp <= to_date(:tstamp, 'YYYY-MM-DD hh24:mi:ss')
                       )
                  """,
                  'PURGE_LOG_DATA' : "delete from log l where l.stamp <= to_date(:tstamp, 'YYYY-MM-DD hh24:mi:ss')"
                },
              'postgresql' : {
                  'COUNT' : 'select count(*) from %(table)s;',
                  'FIND_COLS' : 'select column_name from information_schema.columns where table_name = :the_table;',
                  'FIND_TABLE' : "select table_name from information_schema.tables where table_name like '%!_log' escape '!';",
                  'GET_DATA' : 'select %(cols)s, %(tcols)s from %(tname)s t inner join log l on l.id = t.log_id where l.stamp <= (timestamp :tstamp) order by l.id;',
                  'PURGE_DATA' : 'delete from %(ptable)s t using log l where l.id = t.log_id and l.stamp <= (timestamp :tstamp);',
                  'PURGE_LOG_DATA' : 'delete from log l where l.stamp <= (timestamp :tstamp);'
                }
              }

def setup_options():
    usage = 'usage: %prog --list --archive --purge --timestamp'
    parser = OptionParser(usage=usage)

    parser.add_option('-l', '--list', action='store_true', dest='list',
            help='list number of log entries before --timestamp (or current size if --timestamp not specified), of all *_log tables')
    parser.add_option('-a', '--archive', action='store_true', default=False, dest='archive',
            help='archive as CSV the contents of all *_log tables')
    parser.add_option('-p', '--purge', action='store_true', dest='purge',
            help='purge all entries made on or before --timestamp')
    parser.add_option('-t', '--timestamp', action='store', dest='purge_date', metavar='YYYY-MM-DD hh24:mi:ss',
            help='all entries made at or before this time will be purged')

    return parser

def validate_options():
    parser = setup_options()
    (options, args) = parser.parse_args()
    if not (options.list or options.archive or options.purge):
        parser.error("At least one of --list, --archive, --purge is required")

    if (options.archive or options.purge) and not options.purge_date:
        parser.error("--timestamp is required for --archive and --purge")

    if options.purge_date:
        try:
            time.strptime(options.purge_date, '%Y-%m-%d %H:%M:%S')
        except ValueError:
            parser.error('--timestamp [' + options.purge_date + '] not in format YYYY-MM-DD hh24:mi:ss')

    return (options, args)

def _log(stmt):
    logger = os.getenv("SUDO_USER")
    if logger == None:
        logger = 'root'
    log_clean(0, '%s|%s|%s' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), logger, stmt))

def _get_sql(key):
    return SQL_PIECES.get(CFG.DB_BACKEND).get(key)

def _get_allcols(name):
    all_cols = []
    all_cols.extend(['l'+x for x in LOG_COLS])
    all_cols.extend(columns(name))
    return all_cols

def _get_data_statement(name):
    cols = ', '.join(['l.'+x+' l'+x for x in LOG_COLS])
    tcols = ', '.join(['t.'+x for x in columns(name)])
    data_stmt = _get_sql('GET_DATA') % {"cols":cols, "tcols":tcols, "tname":name}
    return data_stmt

def tables():
    """What are the tables whose names fit the pattern *_log?"""
    stmt = rhnSQL.execute(_get_sql('FIND_TABLE'))
    table_names = stmt.fetchall() or []
    rc = []
    for name in table_names:
        rc.append(name[0])
    return rc

def columns(tbl):
    """What are the columns of table <tbl>?"""
    stmt = rhnSQL.execute(_get_sql('FIND_COLS'), the_table=tbl)
    col_names = stmt.fetchall() or []
    rc = []
    for name in col_names:
        rc.append(name[0])
    return rc

def count(tbl):
    """How many rows are in table <tbl>?"""
    tbl_str = _get_sql('COUNT') % {"table":tbl}
    stmt = rhnSQL.execute(tbl_str)
    num_rows = stmt.fetchone()
    return num_rows[0]

def list(opt):
    """List current state of all *_log tables found"""
    names = tables()
    for name in names:
        print '%32s : %d' % (name, count(name))

def archive(opt):
    """Grab the requested data and store it in a .csv file 'here'"""
    _log('Archiving everything before ' + opt.purge_date)
    names = tables()
    for name in names:
        rows = 0
        outfilename = name+'.'+(datetime.now().strftime('%Y-%m-%d_%H:%M:%S'))+'.csv'

        try:
            f = open(outfilename, 'wb')
        except IOError, e:
            print 'FAILED to open output file ['+outfilename+']'
            sys.exit(1)

        writer = csv.writer(f)
        hdr_names = _get_allcols(name)
        writer.writerow(hdr_names)
        stmt = rhnSQL.execute(_get_data_statement(name), tstamp=opt.purge_date)
        rslts = None
        while 1:
            rslts = stmt.fetchone_dict()
            if not rslts:
                break
            row = []
            for col in hdr_names:
                row.append(rslts.get(col))
            writer.writerow(row)
            rows += 1
        print '%32s : %d rows archived' % (name, rows)
        f.close()

def purge(opt):
    """Remove from the _log tables anything that happened before <timestamp>"""
    _log('Purging everything before ' + opt.purge_date)
    names = tables()
    for name in names:
        del_str = _get_sql('PURGE_DATA') % {"ptable":name}
        stmt = rhnSQL.execute(del_str, tstamp=opt.purge_date)
        print '%32s : %s rows remain' % (name, count(name))
        _log('%32s : %s rows remain' % (name, count(name)))
    rhnSQL.execute(_get_sql('PURGE_LOG_DATA'), tstamp=opt.purge_date)
    return

if __name__ == '__main__':
    (options, args) = validate_options()
    initCFG('server')
    rhnSQL.initDB()

    if options.list:
        list(options)
        sys.exit(0)

    rhnLog.initLOG(LOG_LOCATION)
    if rhnLog.LOG.real == 0:
        print "Failed to access logfile " + LOG_LOCATION + " - exiting..."
        print "(Try running as sudo)"
        sys.exit(1)

    if options.purge:
        print 'Starting counts:'
        list(options)
        print '...archiving'
        rhnSQL.transaction('PURGE')
        archive(options)
        print '...purging'
        purge(options)
        rhnSQL.commit()
    elif options.archive:
        print 'Archiving...'
        archive(options)
