#! /bin/bash
#
# Johannes Meixner <jsmeix@suse.de>, 2010

#set -x

# Make sure to have a clean environment:
export PATH="/sbin:/usr/sbin:/usr/bin:/bin"
export LC_ALL="POSIX"
export LANG="POSIX"
umask 022
# Disable bash file name globbing to avoid side-effects
# with characters like '*' and '?' (see bnc #575056):
set -f

INPUT="printers.xml"
OUTPUT="drivers_for_printers_to_be_considered"
cat /dev/null >$OUTPUT
EXAMPLES="examples_for_drivers_to_be_considered"
cat /dev/null >$EXAMPLES
DIGEST="printers.digest"
DRIVERS_TO_BE_CONSIDERED="drivers_to_be_considered"

# Function to output one entry:
Output()
{ for D in $POSSIBLE_DRIVERS
  do TYPE="$( grep " $D\$" $DRIVERS_TO_BE_CONSIDERED | cut -s -d ' ' -f 1 )"
     if [ -z "$TYPE" ]
     then echo "Skipping $D $ID (no type for $D in $DRIVERS_TO_BE_CONSIDERED)" 1>&2
     else echo "$TYPE $D $ID" >>$OUTPUT
     fi
  done
}

# Function to test and output one entry:
TestAndOutput()
{ if [ -z "$DRIVERS" ]
  then echo "Skipping $ID (no driver at all)" 1>&2
       return 1
  fi
  POSSIBLE_DRIVERS=""
  for D in $DRIVERS
  do if grep -q " $D\$" $DRIVERS_TO_BE_CONSIDERED
     then POSSIBLE_DRIVERS="$POSSIBLE_DRIVERS $D"
     else echo "Skipping $D (not in $DRIVERS_TO_BE_CONSIDERED) for $ID" 1>&2
     fi
  done
  if [ -z "$POSSIBLE_DRIVERS" ]
  then echo "Skipping $ID (no driver in $DRIVERS_TO_BE_CONSIDERED)" 1>&2
       return 1
  fi
  Output
}

# Process the input:
# The useless use of cat is intentional to make it more obvious
# what the input, the processing, and the output is.
# The leading spaces in the 'egrep' search expression
# avoid to get also such XML tags from nested lower levels
# (same XML tags like <url> are used for same kind of content
# but with different meaning/semantics on different levels).
cat $INPUT \
 | egrep '^    <id>|^    <functionality>|^    <driver>|^      <driver>' \
 | sed -e 's/<\/id>//' \
       -e 's/<\/functionality>//' \
       -e 's/<\/driver>//' \
       -e 's/^    <driver>/    <recommended>/' \
       -e 's/[[:space:]][[:space:]]*/ /g' \
       -e 's/^[[:space:]]*//' \
       -e 's/[[:space:]]*$//' \
       -e 's/ \/>//' \
       -e 's/^<//' \
       -e 's/>/ /' \
 | cat >$DIGEST
# The entries in the printers digest have a form like
# ----------------------------------------------------
# id HP-LaserJet_1220
# functionality A
# recommended pxlmono
# driver Postscript
# driver gimp-print
# driver gutenprint
# driver hplip
# driver lj4dith
# driver lj5gray
# driver ljet4
# driver pxlmono
# ----------------------------------------------------
# or with an unexpected driver value like
# ----------------------------------------------------
# id Brother-HL-2035
# functionality B
# recommended hl1250
# driver Brother's HL-2030 driver for CUPS
# driver gimp-print
# driver gutenprint
# driver hl1250
# driver hpijs-pcl5e
# driver lj4dith
# driver lj5gray
# driver ljet4
# ----------------------------------------------------
# or with unexpected bad functionality value like
# ----------------------------------------------------
# id HP-LaserJet_M1522nf_MFP
# functionality D
# recommended pxlmono
# driver Postscript
# driver pxlmono
# ----------------------------------------------------
# or with no functionality like
# ----------------------------------------------------
# id HP-PhotoSmart
# functionality F
# ----------------------------------------------------
# or with missing functionality value like
# ----------------------------------------------------
# id Oki-C5750
# functionality 
# recommended Postscript
# driver Postscript
# ----------------------------------------------------
# where functionality is one of
# ----------------------------------------------------
# functionality 
# functionality A
# functionality B
# functionality D
# functionality F
# ----------------------------------------------------
# Note that there is no "functionality C".
# Because the functionality value is not reliable it is ignored.
# Only the recommended and driver values are used because in the end
# it only matters whether or not a driver is available.
exec <$DIGEST
ID=""
DRIVERS=""
while read KEY VALUE REST
do case "$KEY" in
        id) [ -n "$ID" ] && TestAndOutput
            ID="$VALUE"
            DRIVERS="" ;;
        functionality) ;;
        recommended) DRIVERS="$VALUE" ;;
        driver) if [ -z "$DRIVERS" ]
                then DRIVERS="$VALUE"
                else for D in $DRIVERS
                     do [ "$VALUE" = "$D" ] && VALUE=""
                     done
                     [ -n "$VALUE" ] && DRIVERS="$DRIVERS $VALUE"
                fi ;;
        *) echo "Ignoring key $KEY" 1>&2 ;;
   esac
done

# Sort the list of drivers for printers to be considered:
mv -f $OUTPUT $OUTPUT.unsorted
sort -b -d -f -o $OUTPUT $OUTPUT.unsorted && rm $OUTPUT.unsorted

# Extract one example for each driver:
exec <$OUTPUT
PREVIOUS_DRIVER=""
while read TYPE DRIVER PRINTER
do if [ "$DRIVER" != "$PREVIOUS_DRIVER" ]
   then echo "$TYPE $DRIVER $PRINTER" >>$EXAMPLES
        PREVIOUS_DRIVER="$DRIVER"
   fi
done

