#!/usr/bin/env perl
$version=0.5.2;
################################################################################
#
# urlmafor - automated executor of urlmon in forking mode
#
# Copyright (C) 2001, 2006-2007, 2011 by Dimitar Ivanov 
#                                       <dimitar.ivanov@mirendom.net>
#
# urlmon is Copyright (C) by Jeremy Impson 
#
# urlmafor executes 'urlmon' in forking mode ("-F" option) on a list
# of database files. It allocates automatically and individually for each
# database the number of processes to be forked.
#
# The arguments can be any valid file or directory name, also any regexp
# pattern as used by 'find' for example "-name '.*'"
#
################################################################################
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
################################################################################
#
use Fcntl;
use File::Basename;

sub _Usage_ ()
{ print "Usage: ".basename($0)." [-s (single proc)] <dir|file>\n"; }

if( ! $ARGV[0] ) { _Usage_ ; exit; }
   # Don't fork, do sequential processing
if( $ARGV[0] eq "-s" ) { $single=0; shift; }
   # Check whether directory specified
if( ! $ARGV[0] ) { _Usage_ ; exit ; }

if( -f "./urlmon" && -x "./urlmon" ) { $urlmon = "./urlmon"; }
else         { chomp ($urlmon = `which urlmon`); }
chomp ($urlmonver = `grep '^\$version' $urlmon |cut -f2 -d'"' |cut -f1 -d'.'`);
$urlmonopts = '-lc -F $kids -f $file';
$upc = 3;                  # Minimum URL's per child
$maxkids = 8;              # Maximum children
$maxurls = $upc*$maxkids;  # Maximum URL's if working with max-kids
$kids = 1;
$urlstr = "^URL";

foreach $target (@ARGV) {
   @list = split( '\n', `find $target -maxdepth 1 -type f -o -type l` );
   foreach $file (@list) {
      sysopen(IN, "$file", O_RDONLY) or die "Can not open file $file: $!\n";
      @urls = <IN>;
      $nrurls = grep(/$urlstr/, @urls );   # count the number or urls
      $nrurls = 0 if ! $nrurls;  # if not defined, then 0
      close (IN) or die "Can not close file $file: $!\n";
      next if ($nrurls == 0 || $urls[0] !~ /^# $urlmonver/);
      $kids = ($nrurls > $maxkids) ? $maxkids : int( $nrurls / $upc );
      $kids = 1 if ! $kids;
      $kids = 1 if defined $single;
          # Interpolate variables $kids and $file
      ($urlopts = $urlmonopts) =~ s/\$(\w+)/${$1}/g; 
      print "DO: $file\n";
      system("$urlmon $urlopts");
   }
}
exit 0;
