#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright © 2018 SUSE LLC
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; version 2.1.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# Author: Zoltán Balogh <zbalogh@suse.com>
# Summary: Simple tool to list changelogs of packages in the available
# repositories.

from __future__ import print_function
import os
import argparse
import sys
import datetime
import re
import subprocess
from argparse import ArgumentParser
import requests
import rpm
import xml.etree.ElementTree as ET
import tempfile
import difflib


def log_text(text):
    print(text)


def parse_args():
    p = ArgumentParser(prog='zypper-changelog',
                       description='Shows the changelog' +
                       'of one ore more packages.',
                       formatter_class=argparse.RawDescriptionHelpFormatter)
    p.add_argument('-d', '--debug',
                   default=False, action='store_true', dest='debug',
                   help='debug mode')
    p.add_argument('-c', '--commits',
                   default=False, action='store_true', dest='commits',
                   help='Lists only the headline of the change commits')
    p.add_argument('-e', '--expression',
                   default=False, action='store_true', dest='expression',
                   help='Enable regular expression in package name')
    p.add_argument("-p", "--packages", dest="packages", default='',
                   help="Package name \
                              or regular expression to match packages.")
    p.add_argument("-r", "--repositories",
                   dest="repos", default="oss",
                   help="Comma separated list of repositores \
                              to search for changelogs.")
    p.add_argument("-a", "--all",
                   dest="all", default=False,
                   action='store_true',
                   help="Lists changelogs for all packages")
    p.add_argument("-u", "--update",
                   dest="update", default=False,
                   action='store_true',
                   help="Lists changelogs for all packages to be updated")
    if len(sys.argv[1:]) == 0:
        p.print_help()
        p.exit()
    return p


def readRpmHeader(ts, filename):
    # Read an rpm header
    fd = os.open(filename, os.O_RDONLY)
    h = None
    try:
        h = ts.hdrFromFdno(fd)
    except rpm.error as e:
        print(e)
        h = None
    os.close(fd)
    return h


def get_updates():
    update_list, repo_list = (set([]), set([]))
    arch = ''
    zypp_process = subprocess.Popen(["zypper",
                                     "-x",
                                     "list-updates"],
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
    stdout_value, stderr_value = zypp_process.communicate()
    updates_tree = ET.ElementTree(ET.fromstring(stdout_value))
    updates_root = updates_tree.getroot()
    for update in updates_root.iter('update'):
        update_list.add(update.get('name'))
        repo_list.add(update.find('source').get('alias'))
        if update.get('arch') not in ('src', 'noarch'):
            arch = update.get('arch')
    return update_list, repo_list, arch


def local_changelog(package):
    zypp_process = subprocess.Popen(["rpm",
                                     "-q",
                                     "%s" % package],
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
    stdout_value, stderr_value = zypp_process.communicate()
    newest = stdout_value.decode("utf-8").strip().split('\n')[-1]
    search_result = re.search("^%s-(.*)-.*" % re.escape(package), newest)
    if search_result:
        # Multiple version of the same package may be installed
        # and we want to see the changelog of the latest.
        last_version = search_result.group(1)
    zypp_process = subprocess.Popen(["rpm",
                                     "-q",
                                     "--changelog",
                                     "%s-%s" % (package, last_version)],
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
    stdout_value, stderr_value = zypp_process.communicate()
    return stdout_value.decode("utf-8")


parser = parse_args()
args = parser.parse_args(sys.argv[1:])
if args.debug:
    log_text('\nargs: ' + str(sys.argv))
if args.update:
    package_list, repository_list, arch = get_updates()
else:
    repository_list = args.repos.split(",")
    package_list = args.packages.split(",")
if args.packages:
    package_list = args.packages.split(",")

list_of_xml_files = []

# Find the cache file of the repositories
for root, dirs, files in os.walk("/var/cache/zypp/raw/"):
    for file in files:
        if file.endswith("primary.xml.gz"):
            for repository in repository_list:
                if repository in root:
                    list_of_xml_files.append(os.path.join(root, file))

ts = rpm.TransactionSet("", (rpm._RPMVSF_NOSIGNATURES or
                        rpm.RPMVSF_NOHDRCHK or
                        rpm._RPMVSF_NODIGESTS or
                        rpm.RPMVSF_NEEDPAYLOAD))

# Get the available respostories from the xml outout of zypper
# and find the URLs of the repositories
zypp_process = subprocess.Popen(["zypper",
                                 "-x",
                                 "lr"],
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE)
stdout_value, stderr_value = zypp_process.communicate()
repo_tree = ET.ElementTree(ET.fromstring(stdout_value))
repo_root = repo_tree.getroot()
for files in list_of_xml_files:
    for repo in repo_root.iter('repo'):
        if repo.get('alias') in files:
            mirror_url = repo.find('url').text

    zcat_process = subprocess.Popen(["zcat",
                                     "%s" % files],
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
    stdout_value, stderr_value = zcat_process.communicate()

    tree = ET.ElementTree(ET.fromstring(stdout_value))
    root = tree.getroot()
    xml_ns = root.tag.split('}')[0].strip('{')

    # Loop thru the packages listed in the repository cache files
    for package in root.findall('doc:package', namespaces={'doc': xml_ns}):
        if not args.all:
            # skip foreign arch and source packages
            if args.update and package[1].text not in ('%s' % arch, 'noarch'):
                continue
            if args.expression:
                match = False
                for package_item in package_list:
                    pattern = re.compile(package_item)
                    if pattern.match("%s" % package[0].text):
                        match = True
                if not match:
                    continue
            else:
                if package[0].text not in package_list:
                    continue
        # Find the segment/offset of the header part of the rpm package
        for field in package[11].findall('rpm:header-range',
                                         namespaces={'rpm':
                                                     'http://linux.duke.edu' +
                                                     '/metadata/rpm'}):
            start = field.get('start')
            end = field.get('end')
            url = '%s/%s' % (mirror_url, package[10].get('href'))
            log_text(url)
            try:
                # Fetch the rpm header as it contains the changelogs
                rpm_header = requests.get(url,
                                          headers={'Range':
                                                   'bytes=0-%s' % (end)})
                rpm_header.raise_for_status()
            except requests.exceptions.HTTPError as e:
                log_text(e)
                continue
            # Dump the header to a temporary file as the ts.hdrFromFdno
            # needs a real file to process
            header_file, header_filename = tempfile.mkstemp()
            try:
                with os.fdopen(header_file, 'w+b') as f:
                    f.write(rpm_header.content)
                    f.flush()
                f.close()
                h = readRpmHeader(ts, '%s' % header_filename)
            finally:
                os.remove(header_filename)
            if h is None:
                continue
            # Parse the changelog, time and contributor's name
            changelog_name = h[rpm.RPMTAG_CHANGELOGNAME]
            changelog_time = h[rpm.RPMTAG_CHANGELOGTIME]
            changelog_text = h[rpm.RPMTAG_CHANGELOGTEXT]
            changelog = ''
            for (name, time, text) in zip(changelog_name,
                                          changelog_time,
                                          changelog_text):
                dt = datetime.datetime.fromtimestamp(time).strftime("%a %b " +
                                                                    "%d %Y")
                if args.commits:
                    changelog += "* %s %s\n" % (dt, name)
                else:
                    changelog += "* %s %s\n%s\n\n" % (dt, name, text)
            if args.update:
                local = str(local_changelog(package[0].text))
                diff = difflib.ndiff(local.split('\n'), changelog.split('\n'))
                for l in diff:
                    if l.startswith('+ '):
                        print(l.replace('+ ', ''))
            else:
                print(changelog)
