#!/usr/bin/perl -w

$| = 1;
use strict;

my $manifest;
my $quiet;

my %dirs;
my @dirs;
my %modes;
my @modes;
my @modes_type;
my @modes_ghost;
my %files;
my %filesc;

$dirs{'/'} = 0;
push @dirs, '/';

$modes{'40755 0 root:root'} = 0;
push @modes, '40755 0 root:root';
push @modes_type, 040000;
push @modes_ghost, 0;

my $pkg = '';
my $fls = 0;
my $prv = 0;
my $con = 0;
my $obs = 0;

my %con;
my %obs;
my %whatprovides;

while (@ARGV) {
  if ($ARGV[0] eq '--manifest') {
    $manifest = 1;
    shift @ARGV;
    next;
  }
  if ($ARGV[0] eq '-q' || $ARGV[0] eq '--quiet') {
    $quiet = 1;
    shift @ARGV;
    next;
  }
  last;
}
die("Usage: findfileconflicts2 packages[.gz]\n") unless @ARGV == 1;

my @ftypes;
$ftypes[001] = 'p';
$ftypes[002] = 'c';
$ftypes[004] = 'd';
$ftypes[006] = 'b';
$ftypes[010] = '-';
$ftypes[012] = 'l';
$ftypes[014] = 's';

sub beautify_mode {
  my @m = split(' ', $modes[$_[0]], 3);
  my $fm = oct($m[0]);
  my $ft = $fm & 0770000;
  $fm &= ~0770000;
  $ft = $ftypes[$ft >> 12 & 077] || '?';

  my $rts = '';
  my $rt = oct($m[1]);
  $rts .= 'd' if $rt  & 02;
  $rts .= 'c' if $rt  & 01;
  $rts .= 'm' if $rt  & 010;
  $rts .= 'n' if $rt  & 020;
  $rts .= 'g' if $rt  & 0100;
  $rts .= 'l' if $rt  & 0200;
  $rts .= 'r' if $rt  & 0400;
  $rt &= ~0733;
  $rts .= sprintf("%o", $rt) if $rt;
  $rts .= ' ' if $rts;
  return "$rts$ft".sprintf("%03o", $fm)." $m[2]";
}

my %rpmqi = map {$_ => 1} qw {
  1000 1001 1002 1003
  1022 1044
  1047
  1054
  1090
  1116 1117 1118
  1030 1036 1037 1039 1040
};

# must have nvra
my @rpmqmust = qw {
  1000 1001 1002 1022
};

#used in manifest mode
sub rpmq {
  my ($rpm) = @_;
  local *RPM;
  my $lead;
  if (!open(RPM, '<', $rpm) || read(RPM, $lead, 96 + 16) != 96 + 16) {
    warn("$rpm: $!\n");
    return undef;
  }
  my ($magic, $sigtype, $headmagic, $cnt, $cntdata) = unpack('N@78n@96N@104NN', $lead);
  if ($magic != 0xedabeedb || $sigtype != 5 || $headmagic != 0x8eade801) {
    warn("Bad rpm $rpm\n");
    return undef;
  }
  my $sigstuff;
  my $sigl = ($cnt * 16 + $cntdata + 7) & ~7;
  my $head;
  if (read(RPM, $sigstuff, $sigl) != $sigl || read(RPM, $head, 16) != 16) {
    warn("Bad rpm $rpm\n");
    return undef;
  }
  ($headmagic, $cnt, $cntdata) = unpack('N@8NN', $head);
  if ($headmagic != 0x8eade801) {
    warn("Bad rpm $rpm\n");
    return undef;
  }
  my $index;
  my $data;
  if (read(RPM, $index, $cnt * 16) != $cnt * 16 || read(RPM, $data, $cntdata) != $cntdata) {
    warn("Bad rpm $rpm\n");
    return undef;
  }
  close RPM;
  my $q = {};
  my ($tag, $type, $offset, $count);
  while($cnt-- > 0) {
    ($tag, $type, $offset, $count, $index) = unpack('N4a*', $index);
    $tag = 0 + $tag;
    next unless $rpmqi{$tag};
    if ($type == 3) {
      $q->{$tag} = [ unpack("\@${offset}n$count", $data) ];
    } elsif ($type == 4) {
      $q->{$tag} = [ unpack("\@${offset}N$count", $data) ];
    } elsif ($type == 6) {
      $q->{$tag} = unpack("\@${offset}Z*", $data);
    } elsif ($type == 8) {
      my $d = unpack("\@${offset}a*", $data);
      my @res = split("\0", $d, $count + 1);
      $q->{$tag} = [ splice @res, 0, $count ];
    }
  }
  return $q;
}

print "scanning file list\n" unless $quiet;
if ($ARGV[0] =~ /\.gz$/) {
  open(FL, "-|", 'gunzip', '-dc', $ARGV[0]) || die("open $ARGV[0]: $!\n");
} elsif ($ARGV[0] eq '-') {
  open(FL, '<&STDIN') || die("open $ARGV[0]: $!\n");
} else {
  open(FL, '<', $ARGV[0]) || die("open $ARGV[0]: $!\n");
}

my $havebadrpms;

if ($manifest) {
  while(<FL>) {
    chomp;
    my $q = rpmq($_);
    if (!$q) {
      $havebadrpms++;
      next;
    }
    next unless $q->{1044};	# ignore src rpms
    if (grep {!defined($q->{$_})} @rpmqmust) {
      warn("$_: missing rpm tags\n");
      $havebadrpms++;
      next;
    }
    my $n = "$q->{1000}";
    my $pkg = "$q->{1000} $q->{1001} $q->{1002} $q->{1022}";
    push @{$whatprovides{$_}}, $pkg for @{$q->{1047} || []};
    push @{$whatprovides{$n}}, $pkg;
    for (@{$q->{1054} || []}) {
      s/^otherproviders\((.*)\)$/$1/;
      push @{$con{$pkg}}, $_;
    }
    push @{$obs{$pkg}}, $n;
    push @{$obs{$pkg}}, $_ for @{$q->{1090} || []};
    if (@{$q->{1117} || []}) {
      my $im = @{$q->{1117}} - 1;
      for my $i (0..$im) {
	my $fm = $q->{1030}->[$i] || 0;
	my $ff = $q->{1037}->[$i] || 0;
	my $lnk = $q->{1036}->[$i];
        my $fu = $q->{1039}->[$i];
        my $fg = $q->{1040}->[$i];
	# defaults
        $fu = 'root' unless defined $fu;
        $fg = 'root' unless defined $fg;
	$lnk = " -> $lnk" if defined $lnk;
	$lnk = '' unless defined $lnk;

	my $fmo = sprintf("%o %o %s:%s", $fm, $ff, $fu, $fg);
	my $bn = $q->{1117}->[$i];
	my $dn = $q->{1118}->[$q->{1116}->[$i]];
	my $n = $dirs{$dn};
	if (!defined($n)) {
	  $n = @dirs;
	  $dirs{$dn} = $n;
	  $dirs[$n] = $dn;
	}
	# ignore link targets of ghosts
	$lnk = '' if $lnk && ($ff & 0100) != 0;
	my $m = $modes{"$fmo$lnk"};
	if (!defined($m)) {
	  $m = @modes;
	  $modes{"$fmo$lnk"} = $m;
	  $modes[$m] = "$fmo$lnk";
	  $modes_type[$m] = $fm & 07770000;
	  $modes_ghost[$m] = $ff & 0100;
	}
	my $f = "$n/$bn";
	if (($ff & 0100) == 0) {
	  if (exists $files{$f}) {
	    $filesc{$f} ||= [ $files{$f} ];
	    push @{$filesc{$f}}, "$pkg/$m";
	  } else {
	    $files{$f} = "$pkg/$m";
	  }
	}
      }
    }
  }
} else {
  while(<FL>) {
    chomp;
    if ($fls) {
      if ($_ eq '-Flx:') {
	$fls = 0;
	next;
      }
      my $lnk = '';
      if (/^(12.*)( -> .*?)$/) {
	$_ = $1;
	$lnk = $2;
      }
      next unless /^(\d+ (\d+) \S+) (.*\/)(.*?)$/;
      my $n = $dirs{$3};
      if (!defined($n)) {
	$n = @dirs;
	$dirs{$3} = $n;
	$dirs[$n] = $3;
      }
      # ignore link targets of ghosts
      $lnk = '' if $lnk && (oct($2) & 0100) != 0;
      my $m = $modes{"$1$lnk"};
      if (!defined($m)) {
	$m = @modes;
	$modes{"$1$lnk"} = $m;
	$modes[$m] = "$1$lnk";
	$modes_type[$m] = oct($1) & 07770000;
	$modes_ghost[$m] = oct($2) & 0100;
      }
      my $f = "$n/$4";
      if (exists $files{$f}) {
	$filesc{$f} ||= [ $files{$f} ];
	push @{$filesc{$f}}, "$pkg/$m";
      } else {
	$files{$f} = "$pkg/$m";
      }
      next;
    }
    if ($prv) {
      if ($_ eq '-Prv:') {
	$prv = 0;
	next;
      }
      s/ .*//;	# no version stuff;
      push @{$whatprovides{$_}}, $pkg;
      next;
    }
    if ($con) {
      if ($_ eq '-Con:') {
	$con = 0;
	next;
      }
      s/ .*//;	# no version stuff;
      s/^otherproviders\((.*)\)$/$1/;
      push @{$con{$pkg}}, $_;
      next;
    }
    if ($obs) {
      if ($_ eq '-Obs:') {
	$obs= 0;
	next;
      }
      s/ .*//;	# no version stuff;
      push @{$obs{$pkg}}, $_;
      next;
    }
    if (/^=Pkg: (.*)/) {
      $pkg = $1;
      my $n = $pkg;
      $n =~ s/ .*//;
      push @{$obs{$pkg}}, $n;
      next;
    }
    if ($_ eq '+Con:') {
      $con = 1 if $pkg;
      next;
    }
    if ($_ eq '+Obs:') {
      $obs = 1 if $pkg;
      next;
    }
    if ($_ eq '+Prv:') {
      $prv = 1 if $pkg;
      next;
    }
    if ($_ eq '+Flx:') {
      $fls = 1;
      next;
    }
  }
}
close(FL) || die("close failed\n");

print "currently have ".@dirs." dirs and ".@modes." modes\n" unless $quiet;

# connect dirs and add all dirs as files
print "connecting ".@dirs." directories\n" unless $quiet;
my @implicit_conflicts;
for (@dirs) {
  next unless /^(.*\/)(.*?)\/$/;
  my $n = $dirs{$1};
  if (!defined $n) {
    $n = @dirs;
    $dirs{$1} = $n;
    $dirs[$n] = $1;
    next;
  }
  my $f = "$n/$2";
  next unless $files{$f};
  my (undef, $m) = split('/', $files{$f}, 2);
  next if $modes_type[$m] == 040000;
  # whoa, have a conflict. search for other dirs
  my $have_dir;
  for my $pkg (@{$filesc{$f} || []}) {
    (undef, $m) = split('/', $pkg, 2);
    $have_dir = 1 if $modes_type[$m] == 040000;
  }
  next if $have_dir;
  push @implicit_conflicts, $f;
}
print "now ".@dirs." directories\n" unless $quiet;

# the old and fast way
#
#for my $f (@implicit_conflicts) {
#  $filesc{$f} ||= [ $files{$f} ];
#  push @{$filesc{$f}}, "implicit_directory 0 0 noarch pkg/0";
#}

if (@implicit_conflicts) {
  print "have implicit conflicts, calculating dir owners\n" unless $quiet;
  my @pdirs;	# parent dirs
  for (@dirs) {
    next unless /^(.*\/)(.*?)\/$/;
    $pdirs[$dirs{$_}] = $dirs{$1};
  }
  my %baddir;
  for (@implicit_conflicts) {
    my ($n, $x) = split('/', $_, 2);
    $baddir{$dirs{"$dirs[$n]$x/"}} = $_;
  }
  my $done;
  while (!$done) {
    $done = 1;
    my $i = -1;
    for (@pdirs) {
      $i++;
      next unless defined $_;
      next unless $baddir{$_} && !$baddir{$i};
      $baddir{$i} ||= $baddir{$_};
      undef $done;
    }
  }
  undef @pdirs;
  # this is not cheap, sorry
  my %baddir_pkgs;
  for my $ff (keys %files) {
    my ($n, undef) = split('/', $ff, 2);
    next unless $baddir{$n};
    for (@{$filesc{$ff} || [ $files{$ff} ]}) {
      my ($pkg, undef) = split('/', $_, 2);
      $baddir_pkgs{$baddir{$n}}->{"$pkg/0"} = 1;
    }
  }
  for my $f (@implicit_conflicts) {
    $filesc{$f} ||= [ $files{$f} ];
    $baddir_pkgs{$f} ||= { "implicit_directory 0 0 noarch pkg/0" => 1 };
    push @{$filesc{$f}}, sort keys %{$baddir_pkgs{$f}};
  }
}

%files = ();	# free mem

# reduce all-dir conflicts and trivial multiarch conflicts
print "reducing trivial conflicts\n" unless $quiet;
for my $f (sort keys %filesc) {
  my $allm;
  my $allc = 1;
  my $pkgn;
  my $pl;
  for my $pkg (@{$filesc{$f}}) {
    my ($p, $m) = split('/', $pkg, 2);
    die unless $p =~ /^([^ ]+) /;
    $allm = $m unless defined $allm;
    $allm = -1 if $allm != $m;
    $pkgn = $1 unless defined $pkgn;
    $allc = 0 if $pkgn ne $1;
    $allc = 0 if $pl && $p eq $pl;
    $pl = $p;
  }
  if ($allc) {
    delete $filesc{$f};
    next;
  }
  if (defined($allm) && $allm >= 0 && $modes_type[$allm] == 040000) {
    delete $filesc{$f};
    next;
  }
}

print "checking conflicts\n" unless $quiet;
my %pkgneeded;
my %tocheck;
my %tocheck_files;
for my $f (sort keys %filesc) {
  my @p = sort(@{$filesc{$f}});	# normalize
  $filesc{$f} = [ @p ];
  s/\/.*// for @p;
  $pkgneeded{$_} = 1 for @p;
  my $pn = join("\n", @p);
  $tocheck{$pn} ||= [ @p ];
  push @{$tocheck_files{$pn}}, $f;
}

my %conflicts;
for my $pkg (sort keys %con) {
  next unless $pkgneeded{$pkg};
  for my $c (@{$con{$pkg}}) {
    for my $p (@{$whatprovides{$c} || []}) {
      next if $p eq $pkg;
      $conflicts{"$pkg\n$p"} = 1;
      $conflicts{"$p\n$pkg"} = 1;
    }
  }
}

for my $pkg (sort keys %obs) {
  next unless $pkgneeded{$pkg};
  for my $c (@{$obs{$pkg}}) {
    for my $p (@{$whatprovides{$c} || []}) {
      next if $p eq $pkg;
      next unless $p =~ /^\Q$c\E /;
      $conflicts{"$pkg\n$p"} = 1;
      $conflicts{"$p\n$pkg"} = 1;
    }
  }
}

# let 32bit packages conflict with the i586 version
for my $pkg (sort keys %pkgneeded) {
  next unless $pkg =~ /^([^ ]+)-32bit /;
  my $n = $1;
  for my $p (@{$whatprovides{$n} || []}) {
    next unless $p =~ /^\Q$n\E .* i[56]86$/;
    next if $p eq $pkg;
    $conflicts{"$pkg\n$p"} = 1;
    $conflicts{"$p\n$pkg"} = 1;
  }
}

print "found ".(keys %tocheck)." conflict candidates\n" unless $quiet;
print "checking...\n" unless $quiet;
# now check each package combination for all candidates
my $haveconflict = 0;
for my $tc (sort keys %tocheck) {
  my @p = @{$tocheck{$tc}};
  while (@p) {
    my $p1 = shift @p;
    for my $p2 (@p) {
      next if $conflicts{"$p1\n$p2"};
      my @con;
      for my $f (@{$tocheck_files{$tc}}) {
	my @pp = grep {s/^(?:\Q$p1\E|\Q$p2\E)\///} map {$_} @{$filesc{$f}};
	next unless @pp;
	# ignore if (all directories or all ghosts or all links) and all same mode;
	my %allm = map {$_ => 1} @pp;
	my $info = '';
	if (keys(%allm) == 1) {
	  my $m = (keys(%allm))[0];
	  # all modes/flags are the same
	  # no conflict if all dirs or all ghosts or all links
	  next if $modes_type[$m] == 040000 || $modes_type[$m] == 0120000 || $modes_ghost[$m] == 0100;
	} else {
	  # don't report mode mismatches for files/symlinks that are not ghosts
	  for my $m (keys %allm) {
	    if (($modes_type[$m] != 0100000 && $modes_type[$m] != 0120000) || $modes_ghost[$m] == 0100) {
	      $info = ' [mode mismatch: '.join(', ', map {beautify_mode($_)} @pp).']';
	      last;
	    }
	  }
	}
	# got one!
	$f =~ /^(\d+)\/(.*)/;
	push @con, "$dirs[$1]$2$info";
      }
      next unless @con;
      my @sp1 = split(' ', $p1);
      my @sp2 = split(' ', $p2);
      print "found conflict of $sp1[0]-$sp1[1]-$sp1[2].$sp1[3] with $sp2[0]-$sp2[1]-$sp2[2].$sp2[3]:\n";
      print "  - $_\n" for @con;
      $haveconflict++;
    }
  }
}

exit($haveconflict || $havebadrpms ? 1 : 0);
