#!C:\Perl\bin\perl.exe
# 
# The #!C:\Perl\bin\perl.exe is Perl location at my office winbox.
# 

use strict;
# use warnings; # turn this on for debugonly

use Cwd;
use Encode; # I use it to encode and decode strings in/from different encodings
use File::Find;
use File::Copy; # I use it
use Time::HiRes qw( gettimeofday tv_interval ); # I use it

$|=1; #turn off stdout buffering

my $cwd = getcwd();
my $i = 0;
# my @ftypes_G        = ("fb2","zip","rar"); # List of filetypes of interest
my @ftypes_G        = ("fb2"); # List of filetypes of interest
#my @dirlist_G       = ($cwd); # List of directories of interest
#my @dirlist_G       = ("E:\\k"); # List of directories of interest
#my @dirlist_G       = ("E:\\k\\Aldebaran\\2008.07.1"); # List of directories of interest
#my @dirlist_G       = ("E:\\k\\Aldebaran\\2008.07.1","E:\\k\\litres4415\\2008.07"); # List of directories of interest
#my @dirlist_G       = ("E:\\k\\Aldebaran","E:\\k\\litres4415"); # List of directories of interest
my @dirlist_G       = (); # List of directories of interest
my $unzip_G = "\"C:\\Program Files\\7-Zip\\7z.exe\"";
my $unzip_tmp_out_fname_G = "tmp-unzip-out.txt";

my $t0_G = [gettimeofday];
my $t1_G = [gettimeofday];
my $t_running_G = [gettimeofday];

#print "\n-------------------------------------------------------\n";
#print @ARGV;
print "\n-------------------------------------------------------\n";
print "cwd=$cwd";
print "\n-------------------------------------------------------\n";
#print $abs_path;
#print "\n-------------------------------------------------------\n";


#debugonly &subzil_print_hash_as_txt(\%hashOfFiles); # pass reference to hash as argument

### 
### Read in directories given as parameters, if no parameters given - exit
### 
if( ($i=@ARGV) >0 )
{
    @dirlist_G = (); # reset the list
    my $dir = "";
    for($i=0;$i<@ARGV;$i++)
    {
        $dir = $ARGV[$i];
        push(@dirlist_G,$dir);
        printf("DEBUG: arg[%2d]=\"%s\"\n",$i,$dir);
    }; # forr 
}else
{
    # @dirlist_G = ($cwd); # reset the list
    printf("ERROR:123: You must specify in command line at least one directory to process.\n");
    printf("USAGE: This script takes fb2-files from the specified directory(s) (recursively) and makes copy of them in the currnet directory according to <title-info> <lang> tag. \"ru\" goes into (created) subdir \"lang-ru\", others go into (created) subdir \"lang-other\".\n");
    printf("NOTE: everything in the current directory will be silently overwritten. So beware.\n");
    exit 0;
}; # iffelse
for($i=0;$i<@dirlist_G;$i++)
{
    printf("DEBUG:[%2d] dir=\"%s\"\n",$i,$dirlist_G[$i]);
}; # forr 
for($i=0;$i<@ftypes_G;$i++)
{
    printf("DEBUG:[%2d] type=\"%s\"\n",$i,$ftypes_G[$i]);
}; # forr 
print "\n-------------------------------------------------------\n";

######### Main loop
my $ii = 0;
my $fullfname_G = "";
my $fname_G = "";
my $outfname_G = "";
my $outfname_marked_G = "";
my $origin_mark_G = "";
my $buf_G = "";
my %HAUT_G        = (); # by author
my %HTIT_G        = (); # by title

my @FL_G          = (); # List of files
my @FL_RU__G      = (); # List of files
my @FL_OTH_G      = (); # List of files
my @FL_BAD_G      = (); # List of files

my $listsize_G = 0;

my $curnum_G = 0;
my $prev_curnum_G = 0;
my $numtot_G = 0;
my $numall_G = 0;

my $numeng_G = 0;
my $numwin_G = 0;
my $numiso_G = 0;
my $numkoi_G = 0;
my $numutf_G = 0;
my $numbad_G = 0;

my $numru__G = 0;
my $numoth_G = 0;

my $dirname_ru__G = "lang-ru";
my $dirname_oth_G = "lang-other";
### Action!
mkdir($dirname_ru__G);
mkdir($dirname_oth_G);

my $numdir_G = 0;
foreach my $dir (@dirlist_G)
{
    $numdir_G++;
    printf("Processing directory: %s (%d of %d)\n",$dir,$numdir_G,my $n=@dirlist_G);
    my $t1 = [gettimeofday];
    my @ll_single_dir_list = ($dir);
    @FL_G = &koplib_generate_filelist(\@ll_single_dir_list,\@ftypes_G); # note: you must pass list as reference: \@list, not @list
    my $listsize = @FL_G;
    printf(" FL_G contains %d files.\n",$listsize);
    my $elapsed = tv_interval($t1,[gettimeofday]);
    printf("Completed: building filelist for dir=\"%s\": elapsed=%f seconds (%f seconds per file)\n",$dir,$elapsed,($elapsed/$listsize));
    print "-------------------------------------------------------\n";
    ### 
    ### 
    ### 
    @FL_RU__G      = (); # clean up
    @FL_OTH_G      = (); # clean up
    @FL_BAD_G      = (); # clean up
    $prev_curnum_G = 0;
    $t_running_G   = [gettimeofday];

    for($i=0,$listsize=@FL_G;$i<@FL_G && ($fullfname_G = $FL_G[$i]);$i++)
    {
        $curnum_G++;
        $numall_G++;
        $numtot_G++;
        ### 
        my $rawdes   = &koplib_fb2_load_raw_description_from_file($fullfname_G);
        my $enc      = &koplib_fb2_get_encoding($rawdes);
        my $lan      = &koplib_fb2_get_tag_content(&koplib_fb2_get_tag_content($rawdes,"title-info",$enc),"lang",$enc);        
        my $newf     = substr($fullfname_G,rindex($fullfname_G,"/")+1); # filename only, strip out directories
        if( uc($lan) eq "RU")
        {
            ### copy this file into lang-ru
            $newf = $dirname_ru__G . "/" . $newf; # form new full filename
            ### Action!
            copy($fullfname_G,$newf);
            $numru__G++;
            push(@FL_RU__G,$fullfname_G);
        }else{
            ### copy this file into lang-other
            $newf = $dirname_oth_G . "/" . $newf; # form new full filename
            ### Action!
            copy($fullfname_G,$newf);
            $numoth_G++;
            push(@FL_OTH_G,$fullfname_G);
            # printf("[%6d of %6d] Bad enc=\"%s\" $fullfname_G\n",$i+1,$listsize,$enc);
            if(length($lan)<2)
            {
                my $tii = &koplib_fb2_get_tag_content($rawdes,"title-info",$enc);
                printf("DEBUG:lan=\"%s\" enc=%s fil=%s tii=%s\n",$lan,$enc,$newf);
            }; # iff 
        }; # iffelse 
        ### 
        ### Progress indicator
        ### 
        if( ($curnum_G % 250)==0 && ($curnum_G != $prev_curnum_G)) # Modulus % returns the remainder of a division progress indicator
        {
            # printf("DEBUG: cur=%d prevcur=%d \n",$curnum_G,$prev_curnum_G);
            my $percent    = ($curnum_G * 100.0) / $listsize_G;
            my $elapsed    = tv_interval($t_running_G,[gettimeofday]);
            $t_running_G   = [gettimeofday];
            my $perfile    = $elapsed / ($curnum_G - $prev_curnum_G);
            $prev_curnum_G = $curnum_G;
            my $todo    = ($listsize_G - $curnum_G);
            my $eta     = $perfile * $todo;
            my $eta_min = $eta / 60;
            my $eta_sec = $eta % 60;
            printf("[%6d of %6d]",$curnum_G,$listsize_G);
            printf(" (%4.1f\%)(ETA=%3d min %2d sec)(%f per file)",$percent,$eta_min,$eta_sec,$perfile);
            printf("\n");
        }; # iff     
    }; # forr FL_G
    ### 
    ### 
    ### 
    print "\n-------------------------------------------------------\n";
    printf("\tTotal : %6d files for directory %s\n",my $n=@FL_G,$dir);
    printf("\tru    : %6d files.\n",my $n=@FL_RU__G);
    printf("\tother : %6d files.\n",my $n=@FL_OTH_G);
    printf("-------------------------------------------------------\n");

    my $elapsed = tv_interval($t0_G,[gettimeofday]);
    printf("Completed processing for dir=\"%s\": %d files, elapsed %.1f seconds (%f seconds per file)\n",$dir,$listsize,$elapsed,($elapsed/$listsize));



}; # foreach dir

print "\n-------------------------------------------------------\n";
printf("\t\t\tGrand Total  : %6d files.\n",$numall_G);
printf("\t\t\tru           : %6d files.\n",$numru__G);
printf("\t\t\tother        : %6d files.\n",$numoth_G);
printf("-------------------------------------------------------\n");

printf("-------------------------------------------------------\n");
my $elapsed = tv_interval($t0_G,[gettimeofday]);
printf("Completed all processing: %d files, elapsed %.1f seconds (%f seconds per file)\n",$numtot_G,$elapsed,($elapsed/$numtot_G));
printf("-------------------------------------------------------\n");

exit (0);


###############################################################################
#
# Parameters: list of directory path as parameter (could be single dir, but still a list)
#             list of filetypes of interest (optional, default is all files)
# Returns   : resulting full pathnames as list
# Uses      : File::Find File::Find::name
# Globals   : none 
# Errors    : 
#
sub koplib_generate_filelist # @METAGS koplib_generate_filelist
{
      # declare local (for this func) variables
      my @l_flist = ();
      my @l_not_needed_flist = ();
      my @l_dir_given = ();
      my @l_ftypeslist = ();
      my $l_have_types_fok;
      my ($l_filenum_in_dir); # 
      my $i = 0;
  # initialize local (for this func) variables from the func call parameters
  # $l_dir_given     = $_[0]; #reads in the parameter
  @l_dir_given     = @{$_[0]}; #reads in the parameter
  for($i=0;$i<@l_dir_given;$i++)
  {
      printf("DEBUG:koplib_generate_filelist:[%2d] l_dir_given=\"%s\"\n",$i,$l_dir_given[$i]);
  }; # forr 

  if( defined($_[1]) )
  {
      @l_ftypeslist     = @{$_[1]}; #reads in the list as parameter
      $l_have_types_fok = 1;
  }else
  {
      $l_have_types_fok = 0;
  };  
  # generate the list of files
  # foreach my $ll_dir (@dirList) 
  foreach my $ll_dir (@l_dir_given) 
  {
     if(! -d $ll_dir) 
     {
         ### push @failDir, ($ll_dir);
        next; # foreach ll_dir
     }; 
     find { wanted => sub {
       my $l_is_wanted_fok;
       if(-f $File::Find::name) # is plain file
       {
           $l_is_wanted_fok = 0;
           if( $l_have_types_fok )
           {
               my $suffix = substr($File::Find::name,(rindex($File::Find::name,'.'))+1); # get filetype suffix
               foreach my $suff (@l_ftypeslist)
               {
                   # printf("DEBUG:suff=$suff suffix=$suffix name=$File::Find::name\n");
                   if( uc($suffix) eq uc($suff))
                   {
                       $l_is_wanted_fok = 1; # this suffix is wanted
                       last; # foreach
                   }; # iffelse 
               }; # foreach
           }else
           {
               $l_is_wanted_fok = 1; # no suffixes given, so all files are wanted
           };
          if($l_is_wanted_fok)
          {
              push(@l_flist,$File::Find::name);
          }
          else
          {
              push(@l_not_needed_flist,$File::Find::name);
          }; # iffelse 
       }; # if plain file
       return; # sub wanted()
    # }, follow => 0}, "$l_dir_given";
    }, follow => 0}, "$ll_dir";
  }; # foreach
  ##### 
  # my $i = 1;
  # foreach my $fname (@l_not_needed_flist)
  # {
      # printf("[%5d]DEBUG:koplib_generate_filelist:NOT-NEEDED:$fname\n",$i++);
      # };
  ##### 
  return @l_flist;
} # koplib_generate_filelist

###############################################################################
#
# Parameters: $lp_inp_fb2_fname
# Returns   : $lr_description, "" if error
# Uses      : 
# Globals   : none 
# Errors    : exit if input file does not exist 
#
sub koplib_fb2_load_raw_description_from_file # @METAGS koplib_fb2_load_raw_description_from_file
{
  my $lp_inp_fb2_fname     = $_[0]; #reads in the parameter
  my $lr_description = "";
  my $l_allline = "";
  my @l_lines = ();
  my $li = 0;
 

  if ( ! ( open (INP_FILE,"<$lp_inp_fb2_fname")))  {
      ########## file doesn't exist
      printf("ERROR:378:koplib_fb2_load_raw_description_from_file: File does not exist fname=\"%s\"\n",$lp_inp_fb2_fname);
      exit (253);
  }else{
      @l_lines=<INP_FILE>;
      close(INP_FILE);
      $l_allline = join('',@l_lines); # re-join array of lines back into one string
      $li        = index($l_allline,"</description>");
      if($li<=0) 
      {
          $li    = index($l_allline,"</DESCRIPTION>");
      }; # iff 
      if($li<=0) 
      {
          return ""; # description end tag was not found
      }; # iff 
      $li += 14; 
      $lr_description = substr($l_allline,0,$li);
  }; # iffelse  

  return $lr_description;
} # koplib_fb2_load_raw_description_from_file
 
###############################################################################
#
# Parameters:  $lp_description
# Returns   :  $lr_encoding
# Uses      : 
# Globals   : none 
# Errors    : 
#
sub koplib_fb2_get_encoding # @METAGS koplib_fb2_get_encoding
{
  my $lp_description     = $_[0]; #reads in the parameter
  my $lr_encoding = "";

  my $i    = index(uc($lp_description)," ENCODING=\"");
  if($i<0) { return (""); }; # No proper encoding here
  my $ibeg = $i+11;
     #$i    = index($lp_description,"\"\?\>",$i);
     $i    = index($lp_description,"\"",$ibeg+1);
  my $iend = $i;
  my $ilen = $iend - $ibeg;
  if($ilen>33) # Magic here, black one...
  {
      $ilen=33;
  }; # iff 
  $lr_encoding = substr($lp_description,$ibeg,$ilen);

  return $lr_encoding;
} # koplib_fb2_get_encoding

###############################################################################
#
# Parameters: $lp_description $lp_tagname, $lp_encoding (optional)
# Returns   : $lr_tag_content
# Uses      : koplib_make_win1251
# Globals   : none 
# Errors    : 
#
sub koplib_fb2_get_tag_content # @METAGS koplib_fb2_get_tag_content
{
  my $lp_description     = $_[0]; #reads in the parameter
  my $lp_tagname         = $_[1]; #reads in the parameter
  my $lp_encoding    = "";
  my $l_tagname_uc   = "";
  my $l_tagname      = "";
  my $lr_tag_content = "";
  my $l_description_uc = "";
  if( defined($_[2]) )
  {
      $lp_encoding  = $_[2];
  }; # iff
  if( length($lp_encoding) < 4 )
  {
      ### Wrong encoding? We better get it ourselves
      $lp_encoding  = &koplib_fb2_get_encoding($lp_description);
  }; # iff  
  $l_description_uc = uc($lp_description);
  $l_tagname_uc     = uc($lp_tagname    );
  $l_tagname        = "<" . $l_tagname_uc . ">";
  my $i    = index($l_description_uc,$l_tagname);
  if($i<0)
  {
      return ""; # error occured, return empty tag content
  }; # iff 
  my $ibeg = $i+length($l_tagname);
  $l_tagname        = "</" . $l_tagname_uc . ">";
     $i    = index($l_description_uc,$l_tagname,$ibeg+1);
  if($i<0)
  {
      return ""; # error occured, return empty tag content
  }; # iff 
  my $iend = $i;
  my $ilen = $iend - $ibeg;
  # printf("DEBUG:koplib:273:ibeg=%d iend=%d ilen=%d desc=%s\n",$ibeg,$iend,$ilen,$lp_description);
  # $ilen = ($ilen>999)?(999):($ilen); # Magic here, black one...
  if($ilen<1)
  {
      return ""; # error occured, return empty tag content
  }; # iff 
  if($ilen > length($lp_description)-$ibeg)
  {
      return ""; # error occured, return empty tag content
  }; # iff 
  $lr_tag_content = substr($lp_description,$ibeg,$ilen);
  $lr_tag_content = &koplib_make_win1251($lr_tag_content,$lp_encoding);
  return $lr_tag_content;
} # koplib_fb2_get_tag_content

###############################################################################
#
# Parameters: $lp_input $lp_encoding
# Returns   : $lr_out
# Uses      : Encode
# Globals   : none 
# Errors    : 
#
sub koplib_make_win1251 # @METAGS koplib_make_win1251
{
  my $lp_input      = $_[0]; #reads in the parameter
  my $lp_encoding   = "";
  my $l_input       = "";
  my $lr_out        = "";
  if( defined($_[1]) )
  {
      $lp_encoding  = $_[1];
  };
  my $l_encoding_uc = uc($lp_encoding);
  # printf("DEBUG:koplib:254:inp=\"%s\" enc=\"%s\"\n",$lp_input,$lp_encoding);
  if( length($lp_encoding) <3 )
  {
      return $lp_input; # wrong encoding call, do nothing
  }; # iff 
  if( uc($l_encoding_uc) eq "WINDOWS-1252")
  {
      $l_input = decode("iso-8859-2",$lp_input);
  }elsif( uc($l_encoding_uc) eq "WINDOWS-1251" ){
      $l_input = decode("windows-1251",$lp_input);
  }elsif( uc($l_encoding_uc) eq "ISO-8859-1" ){
      $l_input = decode("iso-8859-1",$lp_input);
  }elsif( uc($l_encoding_uc) eq "KOI8-R" ){
      $l_input = decode("koi8-r",$lp_input);
  }elsif( uc($l_encoding_uc) eq "UTF-8" ){
      $l_input = decode("utf-8",$lp_input); # yes, it's necessary!
  }else{
      return $lp_input; # error occured, do nothing
  }; # iffelse         
  ### Works!!!
  $lr_out = encode("windows-1251",$l_input);
  return $lr_out;
} # koplib_make_win1251