#! /usr/bin/perl -w

# dl10n-stats -- Debian l10n statistics
#
# Copyright (C) 2004 Martin Quinson
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.

use strict;
use Getopt::Long; #to parse the args
use Time::Local 'timelocal'; # to compute bug ages

my $progname= $0; $progname= $& if $progname =~ m,[^/]+$,;

my $VERSION = "3.0"; #External Version Number
my $BANNER = "Debian l10n infrastructure -- textual statistics extractor v$VERSION"; # Version Banner - text form
my $DB_FILE="./data/status";
my $STATUS_FILE='./data/status.$lang';
my $STATUS_EN_FILE='./data/status.en';
my $IGNORE_FILE='';

my $list_file=undef;
my $take_debian=0;
my $fmt = "po,podebconf,po4a";

my $mask_done = 0;
my $show_empty = 0;

my $show_status = 0;
my $show_total = 0;
my $assume_bts = 0;
my $diff_only = 0;

use Debian::L10n::Db;

sub syntax_msg {
    my $msg = shift;
    if (defined $msg) {
	print "$progname: $msg\n";
    } else {
	print "$BANNER\n";
    }
    print
"Syntax: $0 [options] [lang]+
General options:
    -h, --help                display short help text
    -V, --version             display version and exit

Package selection:
    --debian                  Only take debian specific packages
    --list=file               Only handle the packages listed in the provided file
    -t,--todo                 Display only when the translation is NOT completed
    -e,--empty                Display even if there is no translation to this language
    --diff-only               Only take debian specific packages based only on the diff presence

Informations to display:
    --total                   Show only summary for the lang, not each package details
    -s,--status               Show status (hard to read when there is more than one format)
    --show=fmt                Show only selected format (instead of $fmt)

    -a,--assume-bts           Assume that the content bugs in the BTS were applied.

Database to use:
    --db=DB_FILE              use DB_FILE as database file
                                (instead of $DB_FILE)
    --sdb=STATUS_FILE         use STATUS_FILE as status file
                                (instead of $STATUS_FILE)
    --edb=STATUS_EN_FILE         use STATUS_EN_FILE as status file
                                (instead of $STATUS_EN_FILE)
    --idb=IGNORE_FILE         use IGNORE_FILE as list of packages to
                                ignore
";
    if (defined $msg) {
	exit 1;
    } else {
	exit 0;
    }
}

# Display Version Banner
# Options: -V|--version, --print-version
sub banner {
  if ($_[0] eq 'print-version') {
    print "$VERSION\n";
  } else {
    print "$BANNER\n";
  }
  exit 0;
}

# Hash used to process commandline options
my %opthash = (# ------------------ general options
    "help|h" => \&syntax_msg,
    "version|V" => \&banner,
    "print-version" => \&banner,

    # ------------------ configuration options
    "todo|t"    => \$mask_done,
    "empty|e"    => \$show_empty,
    "status|s"    => \$show_status,
    "total"  => \$show_total,
    "assume-bts|a" => \$assume_bts,

    "debian" => \$take_debian,
    "diff-only" => \$diff_only,
    "show=s"  => \$fmt,
    "db=s" => \$DB_FILE,
    "sdb=s" => \$STATUS_FILE,
    "edb=s" => \$STATUS_EN_FILE,
    "idb=s" => \$IGNORE_FILE,
    "list=s" => \$list_file,
);

# init commandline parser
Getopt::Long::config('bundling', 'no_getopt_compat', 'no_auto_abbrev');

# process commandline options
GetOptions(%opthash)
    or syntax_msg("error parsing options");

$show_status = 1 if ($assume_bts);


#-----------------------------------------------------------------------------
#                        The main program
#-----------------------------------------------------------------------------
###
### initialisation
###

(@ARGV > 0) or &syntax_msg("Nothing to do !");
my $arg;
my @todo_lang;
while ($arg = shift @ARGV) {
    push @todo_lang,$arg;
}

my $data = Debian::L10n::Db->new();
print "Read the database...";
$data->read($DB_FILE);
print " done.\n";

my %ignored_pkgs = ();
if ($IGNORE_FILE) {
   open IGNORE, "$IGNORE_FILE" or die "Impossible to read the ignore file $IGNORE_FILE\n";
   while (<IGNORE>) {
     chomp;
     next unless $_;
     $ignored_pkgs{$_} = 1;
   }
   close IGNORE;
}

my @todo_pkg;
if (defined($list_file)) {
    print STDERR "Get the package list from $list_file\n";
    open LIST, "$list_file" || die "Impossible to read the list file $list_file\n";
    while (<LIST>) {
	chomp;
	next unless $_;
	s/ //g;
	if ($data->has_package($_)) {
	    push @todo_pkg, $_;
#	    print STDERR "['$_' added]\n";
	} # else {
#	    print STDERR "['$_' is not in the DB, skipped]\n";
#	}
    }
    close LIST;
} else {
    @todo_pkg = sort $data->list_packages();
}
my ($pkg,%p,%d,$man,$status);
my (%total);
my ($man_en_total,$man_fr_total);

my %parts;
map {$parts{$_} = 1} split (/,/, $fmt);
my @poparts=qw(po templates podebconf po4a);

my $format_top = "format STDOUT_TOP = \n".
  '                    '.($parts{'po'}?'______________________':'').($parts{'po4a'}?'_______________________':'').($parts{'templates'}?'______________________':'').($parts{'podebconf'}?'______________________':'').($parts{'man'}?'_______':'')."\n".
  ' __________________|'.($parts{'po'}?'_________po__________|':'').($parts{'po4a'}?'________po4a_________|':'').($parts{'templates'}?'______templates______|':'').($parts{'podebconf'}?'_____po-debconf______|':'').($parts{'man'}?' # man |':'')."\n".
  '|______name________|'.($parts{'po'}?'__%__|____details____|':'').($parts{'po4a'}?'__%__|____details____|':'').($parts{'templates'}?'__%__|____details____|':'').($parts{'podebconf'}?'__%__|____details____|':'').($parts{'man'}?'_______|':'')."\n".
  ".\n";
# print $format_top;
eval $format_top;
die $@ if $@;

my $format = "format STDOUT = \n";
$format .= '|@<<<<<<<<<<<<<<<< |';
foreach my $part (@poparts) {
    $format .= '@||| |@||||||||||||| |' if ($parts{$part});
}
$format .= '@||||| |' if $parts{'man'};
$format .= "@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n".'$pkg,               ';
foreach my $part (@poparts) {
    $format .= '$p{\''.$part.'\'},$d{\''.$part.'\'}, ' if ($parts{$part});
}
$format .= ' $man, ' if $parts{'man'};
$format .= ' $status'." ;\n.\n";
# print $format;
eval $format;
die $@ if $@;

map {
$-=0;
    my $lang=$_;
    my $statusDBname = "$STATUS_FILE";
#    print STDERR "Handle $lang\n" if (scalar @todo_lang > 1);
    $statusDBname =~ s/\$lang/$lang/g;
    my $statusDB = Debian::L10n::Db->new();
    $statusDB->read($statusDBname,0) if $show_status;

    my $statusEnDBname = "$STATUS_EN_FILE";
#    print STDERR "Handle $lang\n" if (scalar @todo_lang > 1);
    my $statusEnDB = Debian::L10n::Db->new();
    $statusEnDB->read($statusEnDBname,0) if $show_status;

    print "Status of the ".($take_debian?"debian ":"")."packages ".($mask_done?"to do ":"")."in $lang\n\n" unless $show_total;

    my %total;
    foreach (@todo_pkg) {
	$pkg = $_;
#	print STDERR "consider $pkg\n";
	# Take only packages having material and not ignored
	next if defined $ignored_pkgs{$pkg};
	next unless $data->has_po($pkg) || $data->has_templates($pkg) || $data->has_podebconf($pkg) || $data->has_man($pkg) || $data->has_po4a($pkg) || $show_empty;

	# Take only debian packages
	if ($take_debian) {
	    next if ($data->has_upstream($pkg) && $data->upstream($pkg) ne "debian");
	    next unless ($data->has_version($pkg));
	    next if ($data->version($pkg) =~ m/-\d/ and not $diff_only);
	}

	# Take only packages having material in this language (unless --empty)
	my $found=0;
	$man=' ';
	my (%score,%ori,%extra);
	$status='';
	foreach my $part (@poparts) {
	    my $has_part="has_$part";
	    $p{$part}='';
	    $d{$part}='';
	    if ($parts{$part} && $data->$has_part($pkg)) {
		my $bts_reported=0;
		$status .= "$part(";
		$score{$part} = '---';

		if ($show_status && $statusDB->has_package($pkg) && $statusDB->has_status($pkg)) {
		    my $tmpstatus;
		    foreach my $statusline (@{$statusDB->status($pkg)}) {
			my ($kind,$file,$date,$status_from_db,$translator,$url,$bug_nb) = @{$statusline};
			# FIXME sort on file? (e.g. dpkg has different
			# files)
			if ($kind eq $part) {
			    my $days = "??";
			    if ($date =~ m/^(\d{4})-(\d\d)-(\d\d) (\d*):(\d*):(\d*)/) { # 2003-07-26
				$days = sprintf "%.0f",
				  (time - timelocal ($6,$5,$4,$3,$2-1,$1)) / (60 * 60 * 24);
			    }
			    $tmpstatus = "$status_from_db, $days days ";
			    if ($status_from_db =~ m/^(bts|done|hold|fix|wontfix)$/i) {
			      $bts_reported = 1;
			    } else {
			      $bts_reported = 0;
			    }
			}
		    }
		    # Only keep the last status.
		    $status .= $tmpstatus if defined $tmpstatus;
		} elsif ($show_status && $statusEnDB->has_package($pkg) && $statusEnDB->has_status($pkg)) {
		    foreach my $statusline (@{$statusEnDB->status($pkg)}) {
			my ($kind,$file,$date,$status_from_db,$translator,$url,$foo,$bug_nb) = @{$statusline};
			if (($part eq "podebconf") and ($kind eq "templates")) {
			    my $days = "??";
			    if ($date =~ m/^(\d{4})-(\d\d)-(\d\d) (\d*):(\d*):(\d*)/) {
			        $days = sprintf "%.0f", (time - timelocal ($6,$5,$4,$3,$2-1,$1)) / (60 * 60 * 24);
			    }
			    $status .= "rev, $days days";
			}
		    }
		}
		$status .= ') ';
		$status  =~ s/ \)/)/;
		$status  =~ s/$part\(\)//;

		foreach my $line (@{$data->$part($pkg)}){
		    my ($pofile, $langfound, $stat) = @{$line};
		    if ($langfound eq $lang) {
			$score{$part} = add_stat($stat,$score{$part});

			if ($mask_done) {
			    unless (   ($assume_bts && $bts_reported)
			            or (output_percent($stat) eq '100%')) {
				$found = 1;
			    }
			} else {
			    $found = 1;
			}
		    } elsif ($langfound eq '_') {
			$ori{$part} = add_stat($stat, $ori{$part});
		    } elsif ($langfound ne '') {
			$extra{$part} = $stat;
		    }
		}

		if ($score{$part} eq '---' && defined($ori{$part})) {
		    $score{$part} = normalize_score($ori{$part},"0t0f0u");
		    $found = 1 if $show_empty && !($assume_bts && $bts_reported);
		} elsif (defined($ori{$part})) {
		    $score{$part} = normalize_score($ori{$part},$score{$part});
		    $found = 1 if ((not $score{$part} =~ /0f0u/) && !($assume_bts && $bts_reported));
		} elsif(not defined($ori{$part}) and $score{$part} eq '---' and defined $extra{$part}) {
		    $score{$part} = "---";
		    $found = 1 if $show_empty && !($assume_bts && $bts_reported);
		}
#		print STDERR "show_empty=$show_empty; assume_bts=$assume_bts; bts_reported=$bts_reported; found=$found\n";
		if ($score{$part} =~ /([0-9]*)t([0-9]*)f-([0-9]*)u/) {
		    $score{$part} = '---';
		    $d{$part}=output_details($score{$part});
		    next;
		}

		$p{$part}=output_percent($score{$part});
		$d{$part}=output_details($score{$part});


		if (defined $score{$part} && $score{$part} ne '---') {
		    if ($assume_bts) {
			my $stat_to_add=$score{$part};
			if ($bts_reported) {
			    $stat_to_add =~ /([0-9]*)t([0-9]*)f([0-9]*)u/;
			    $stat_to_add = ($1+$2+$3)."t0f0u";
			}
			$total{$part} = add_stat($stat_to_add,$total{$part});
		    } else {
			$total{$part} = add_stat($score{$part},$total{$part});
		    }
		}
	    }
	}

	# Search for mans
	if ($parts{'man'} && $data->has_man($pkg)) {
	    my $en=0;
	    my $fr=0;
	    foreach my $line (@{$data->man($pkg)}){
		my ($name, $langfound) = @{$line};
		$en++ if ($langfound eq 'english');
		$fr++ if ($langfound eq 'french');
	    }
	    $man="$fr/$en";
	    $man_fr_total += $fr;
	    $man_en_total += $en;
	    $found = 1 unless $mask_done && $fr == $en ;
	}

	write if ($found && !$show_total);
    }

    if ($show_total) {
	print "$lang: ";
	foreach my $part (@poparts) {
	    print "$part("
	      .output_percent($total{$part}).";"
	      .output_details($total{$part}).")  "
	      if ($parts{$part});
	}
	print "\n";

    } else {
	print "|__________________|";
	foreach my $part (@poparts) {
	    print "_____|_______________|" if $parts{$part};
	}
	print "_______|" if $parts{'man'};
	print "\n";

	$pkg = "TOTAL ($lang)";
	foreach my $part (@poparts) {
	    $p{$part}=output_percent($total{$part});
	    $d{$part}=output_details($total{$part});
	}
	$man="$man_fr_total/$man_en_total" if $parts{'man'};
	if ($assume_bts) {
	    $status = " Assuming that all bugs reported were applied";
	} else {
	    $status = "";
	}
	write;

	print "|__________________|";
	foreach my $part (@poparts) {
	    print "_____|_______________|" if $parts{$part};
	}
	print "_______|" if $parts{'man'};
	print "\n\n\n";

    }
} @todo_lang;

if (not $show_total) {
    print "When there is some ---, that means that the material exists, but is not \n".
	  "translated to this language and that some issue (in pot file or DB) prevent to find the amount of string.\n\n";
    print "Significance of the 'details' columns:\n".
	  "   [# translated strings]/[# fuzzy translation]/[# untranslated strings]\n\n";
    if ($parts{'man'}) {
	print "Significance of the 'man' column: [# french pages]/[# english pages]\n";
	print "WARNING: 'french' is hardcoded in that script for now.\n";
	print "WARNING: do not trust the stats about man for now.\n";
    }
}


sub add_stat {
    my $new=shift;
    my $old=shift;

    return $new unless ($old);
    return $new if ($old eq '---');
    $new =~ /([0-9]*)t([0-9]*)f([0-9]*)u/;
    my ($nt,$nf,$nu) = ($1||0, $2||0, $3||0);
    $old =~ /([0-9]*)t([0-9]*)f([0-9]*)u/;
    my ($ot,$of,$ou) = ($1||0, $2||0, $3||0);
    my $res= ($nt+$ot)."t".($nf+$of)."f".($nu+$ou)."u";
    return $res;
}

sub normalize_score {
    my $orig=shift;
    my $trans=shift;

    $orig =~ /([0-9]*)t([0-9]*)f([0-9]*)u/;
    my ($ot,$of,$ou) = ($1||0, $2||0, $3||0);
    $trans =~ /([0-9]*)t([0-9]*)f([0-9]*)u/;
    my ($tt,$tf,$tu) = ($1||0, $2||0, $3||0);
    my $res= ($tt)."t".($tf)."f".($ot+$of+$ou-$tf-$tt)."u";
    return $res;
}

sub output_percent {
    my $stats=shift||"";
    my $t = "0";
    my $u = "0";
    my $f = "0";
    my $percent;

    if ($stats =~ /([0-9]*)t/) {  $t=$1;  }
    if ($stats =~ /([0-9]*)u/) {  $u=$1;  }
    if ($stats =~ /([0-9]*)f/) {  $f=$1;  }
    $percent = calc_percent($t,$t+$u+$f);
    if ($percent eq "NaN" || $percent == 0) {
	return '';
    }
    return "$percent\%";
}
sub output_details {
    my $stats = shift||"";
    my $t = "0";
    my $u = "0";
    my $f = "0";
    my $percent;

    if ($stats =~ /([0-9]*)t/) {  $t=$1;  }
    if ($stats =~ /([0-9]*)u/) {  $u=$1;  }
    if ($stats =~ /([0-9]*)f/) {  $f=$1;  }
    return ($t+$f+$u == 0 ? $stats : "$t/$f/$u");
}

sub calc_percent{
    my $up=shift;
    my $down=shift;
    my $res;

    if ($down==0) {
	return "NaN";
    }
    $res = $up/$down*100;
    $res =~ s/^([0-9]*)\..*/$1/;
    return $res;
}

