#!/usr/bin/perl
#
#  generic Whois
#
#  (c) 1998-2005 by Michael Holzt <kju@debian.org> and
#                   Lutz Donnerhacke <Lutz.Donnerhacke@Jena.Thur.de>
#
#  Distribution, usage etc. pp. regulated by the current version of GPL.
#
#
#
# History:
# 2005-01-15  kju   Added version output
# 2005-01-13  kju   Added "search" method 'notice' (for giving advice
#                   when no whois/lookup is available)
#                   Added "cgipostcurl" method which uses curl to
#                   fake a referer which is needed for the .nic.ar
#                   (i hate you!)
# 2005-01-06  kju   new routines for searching ipv4 addresses for more
#                   speed improvement
# 2005-01-03  kju   To get rid of the ERX tables which slow down everything,
#                   requery is now ok and will no longer send mails.
#                   (Debian Bug#243148)
#                   Added handling code for .pe (grr) (Debian Bug#264794)
# 2004-03-07  kju   Totally changed the ip address handling code, now uses
#                   CIDR blocks.
# 	            Recognized 'ipas a.b.c.d' as a request to ask team
#		    cymru ip->as mapper whois
# 2004-03-03  kju   Integrated smaller fixes and changes done by lutz.
#                   Removed special code for publicinterestregistry, as
#                   transition is over, and referrers are nolonger used.
#                   Changed handling code for generic whois, can now take
#                   some parameters, removing need for special cases.
# 2003-07-29  kju   fixed publicinterestregistry which is now in transition
#		    phase integrating the whois data
# 2003-02-15  kju   fixed handling of dpkg-old pattern versions
# 2003-01-27  kju   whois.publicinterestregistry.net is now authorative for
#                   .org and needs another special treatment... :-(
#                   plus some minor fixes
# 2003-01-02  f2u   Add "+" for IP address queries to ARIN whois.
#                   Support multiple pattern files.
# 2002-11-01  cord  Small workaround for whois-servers that return ^M.
# 2002-01-17  kju   Reporting if query to upstream whois server failed
#		    (Debian Bug#122535)
# 2000-12-08  kju   Braindead answers from nsiregistry excluded (NSI again!)
# 2000-06-29  lutz  Multiple whois queries (kju)
# 2000-02-29  lutz  Removed obsolete cgibr.
#             lutz  Fixed bug in IP queries.
# 2000-02-08  lutz  Braindead answers from nsiregistry excluded
#                   Workaround for bug@internic removed.
# 1999-12-01  kju   Support for new Internic two-step Whois
# 1999-08-04  lutz  Minor fixed
# 1999-07-01  lutz  Allowed spaces, commas and colons
# 1999-04-12  lutz  Hotfix for problems with LWP::Simple
# 1998-12-21  lutz  solving problems with ripe handles.
# 1998-12-18  lutz  Mirrors possible
#             kju   wwwsgrep with LWP::Simple
#             lutz  multiple options bug fixed
#             kju   whois access made with LWP::Simple
#             lutz  Cosmetic changes,
#                   Configurable,
#                   Dies without line numbers,
#                   Direct server access
#             kju   External Configuration File,
#                   Major rewrite
# 1998-12-17  lutz  Published due to kju
#

use LWP::Simple;

$ENV{'HOME'}='/var/home/whois' unless defined $ENV{'HOME'};

$version = '20050115';

$| = 1;
$confdir = '/etc/gwhois';
$patternfilename = 'pattern';
$patternfilere = qr/^pattern/;
$useLWP = 1;

$sendmail = 1;
if ( -f "/etc/default/gwhois" )
{
  require "/etc/default/gwhois";
}

$step = 1;

while($ARGV[0]) {
  if($ARGV[0] eq '--help' || $ARGV[0] eq '-?') {
    print "gwhois - generic whois\n",
          "Version $version\n\n",
          "Usage: gwhois {options} [query]\n",
          " Try find information about the query (might be multiple words).\n",
	  " If no query is given, use the first line from stdin\n\n",
	  " Options:\n",
	  "   -C dir      setting an alternate configuration directory\n",
	  "               default $confdir\n",
	  "   -h host     selecting a fixed whois server for this query\n",
	  "   -m method:host host   Defining a mirror for a given method and host.\n",
	  "   -L          use lynx -source instead of LWP::Simple\n",
	  "   -v          output version of pattern table(s)\n",
	  "   -?, --help  printing this text\n\n";
    exit;
  } elsif($ARGV[0] eq '-C') {
    shift;
    $confdir = shift;
  } elsif($ARGV[0] eq '-h') {
    shift;
    $fixwhoishost = shift;
  } elsif($ARGV[0] eq '-L') {
    shift;
    $useLWP = 0;
  } elsif($ARGV[0] eq '-m') {
    shift;
    $_ = shift;
    s/://;
    $mirror{$_}=shift;
  } elsif($ARGV[0] eq '-v') {
    print "gwhois - generic whois\n\n",
          "program version:  $version\n",
          "pattern tables:   ";
    foreach $patternfile (&getpatternfiles()) {
      $patternfile = "$confdir/$patternfile";
      if ( open(PATTERN,"<$patternfile") )
      {
        $line = <PATTERN>;
        close(line);
        
        ($version) = $line =~ /#:\s+version\s+(\S+)/;
        $version = 'unknown' if ( $version eq '' );
        print "$version\t($patternfile)\n                  ";
      }
    }  
    print "\n";
    exit(0);
  } else 
  {
    last;
  }
}

# $result = &whoisaccess($host,$port,$query)
sub whoisaccess {
  my ($host,$port,$query) = @_;
  my ($result);

  $query =~ s/ /%20/g;
  if (!defined( $result = $useLWP ? LWP::Simple::get("gopher://$host:$port/0$query")
                    : qx{lynx -source gopher://$host:$port/0$query} ) )
  {
    $result = 'Query to whois server failed.';
  }

  $result =~ s/
//g;
  return $result;
}

# $result = &inicwhoisaccess($host,$port,$query)
sub inicwhoisaccess {
  my ($host,$port,$query) = @_;
  my ($queryresult, $result, $result2);

  $queryresult = whoisaccess($host,$port,"=$query");

  # Result von NSI-Registry auf relevanten Part absuchen
  if ( $queryresult =~ /Name:\s+$query\s/mi ) {
    $result = "-- From: $host:$port\n\n";
    ($host) = $queryresult =~
      /Name:\s+$query\s.*?Whois Server:\s+(.*?)\s/si;
    ($relresult) = $queryresult =~
      /[\r\n]([^\r\n]+\S+\sName:\s+$query\s.*?URL[^\r\n]+)[\r\n]/si;
    
    $result .= "$relresult\n\n-- End --\n\n";
    
    print $step++,". Step: Querying $host:$port with whois.\n\n";
    $port = 43;

    $result .= "-- From: $host:$port\n\n";
    $result .= whoisaccess($host,$port,$query);
  } else {
    $result = "-- From: $host:$port\n\n$queryresult-- End --\n";
  }
  
  return $result;
}

# $result = &wwwsgrep($url,$match)
sub wwwsgrep {
  my ($url,$match) = @_;
  my ($res, $line);
  $line = $useLWP ? LWP::Simple::get($url)
                  : qx(lynx -source $url);
  $line =~ s/\n/ /g;
  if ($line =~ $match) {
    ($res) = $line =~ /$match/s;
  }
  return $res;
}

# ($host, $additional) = &methodpatternregex($query,$host,$additional,$queryline);
sub methodpatternregex {
  my ($query,$host,$additional,$line) = @_;

  my ($namewotld,$tld) = $query =~ /^(.*)\.([^.]*)$/;
  my ($p1,$p2) = $query =~ $line;
  my ($ucq) = uc($query);
  
  $host       =~ s/~query~/$query/;
  $host       =~ s/~ucquery~/$ucq/;
  $host       =~ s/~namewotld~/$namewotld/;
  $host       =~ s/~tld~/$tld/;
  $host       =~ s/~1~/$p1/;
  $host       =~ s/~2~/$p2/;
  
  $additional =~ s/~query~/$query/;
  $additional =~ s/~ucquery~/$ucq/;
  $additional =~ s/~namewotld~/$namewotld/;
  $additional =~ s/~tld~/$tld/;
  $additional =~ s/~1~/$p1/;
  $additional =~ s/~2~/$p2/;
  
  return ($host,$additional);
}

# @patternfiles = &getpatternfiles()
sub getpatternfiles {
  my (@files);

  # Find available pattern files
  opendir(DIR, "$confdir");
  @files = sort(readdir(DIR));
  closedir(DIR);

  # Move "pattern" (main file) to the end, filter non-pattern files (and old -erx ones).
  @files = grep { ($_ =~ /$patternfilere/) 
		  && ($_ ne $patternfilename) && ($_ !~ /dpkg-old/) && ($_ !~ /-erx/) } @files;
  push @files, $patternfilename;

  return (@files);
}


# ($method, $host, $additional) = &getmethodother($query);
sub getmethodother {
  my ($query) = @_;
  my $found=0;
  my ($line,$cline,$method,$host,$additional,@files);


  # Process file until we found a match
  foreach my $patternfile (&getpatternfiles()) {
    $patternfile = "$confdir/$patternfile";

    open(PATTERN,"<$patternfile") || die "Cannot open $patternfile. STOP.\n";
  
    while ( defined($line = <PATTERN>) && (!$found) ) {
      chomp $line;

      if ( $line =~ /^#/ ) {                       # comment
      } elsif ( ($cline) = $line =~ /^:(.*)$/ ) {  # method declaration
        ($method,$host,$additional) = split(/\|/,$cline,3);
      } elsif ( $line ne '' && $query =~ $line ) {
        $found = 1;
        ($host,$additional) = &methodpatternregex($query,$host,$additional,$line);
      }
    }

  }
  if (!$found) {
    return ('','','')
  }
  $host = $mirror{$method.$host} if defined $mirror{$method.$host};
  return ($method,$host,$additional);
}


# %v4pattern = &getpatternv4()
sub getpatternv4 {
  my (%pattern);
  my ($method,$host,$additional,$cline,$line,$rehost,$readditional);

  foreach my $patternfile (&getpatternfiles()) {
    $patternfile = "$confdir/$patternfile";
    open(PATTERN,"<$patternfile") || die "Cannot open $patternfile. STOP.\n";
  
    while ( defined($line = <PATTERN>) && (!$found) ) {
      chomp $line;

      if ( $line =~ /^#/ ) {                       # comment
      } elsif ( ($cline) = $line =~ /^:(.*)$/ ) {  # method declaration
        ($method,$host,$additional) = split(/\|/,$cline,3);
      } elsif ( $line =~ /^=/ ) {
        ($rehost,$readditional) = &methodpatternregex($query,$host,$additional,$line);
        $pattern{$line}{'method'} = $method;
        $pattern{$line}{'host'}   = $rehost;
        $pattern{$line}{'add'}    = $readditional;
      }
    }
  }
  return (%pattern);
}


# ($method, $host, $additional) = &getmethodv4($query);
sub getmethodv4 {
  my ($ipa, $ipb, $ipc, $ipd) = @_;
  my ($ip, $bits, $netmask, $method, $host, $additional, %pattern);
  
  $ip      = $ipa<<24|$ipb<<16|$ipc<<8|$ipd;
  $netmask = 256**4-1;
  %pattern = &getpatternv4();
  
  for ($bits=32;$bits>=0&&$method eq '';$bits--)
  {
    $ip        = $ip & $netmask;
    $netmask <<= 1;

    if ( $bits > 24 ) {
      $cidr = sprintf("%d.%d.%d.%d/$bits", $ip>>24,($ip>>16)&255,($ip>>8)&255,$ip&255);
    } elsif ( $bits > 16 ) {
      $cidr = sprintf("%d.%d.%d/$bits", $ip>>24,($ip>>16)&255,($ip>>8)&255);
    } elsif ( $bits > 8 ) {
      $cidr = sprintf("%d.%d/$bits", $ip>>24,($ip>>16)&255);
    } else {
      $cidr = sprintf("%d/$bits", $ip>>24);
    }

    $method     = $pattern{"=$cidr"}{'method'};
    $host       = $pattern{"=$cidr"}{'host'};
    $additional = $pattern{"=$cidr"}{'add'};
  }
  
  return ($method,$host,$additional);
}


# $result=&doquery($query,$method,$host,$additional);
sub doquery {
  my ($query,$method,$host,$additional) = @_;
  my $result;

  if ($method eq 'wwwgreplv') {
    my ($a,$b) = split(/#/,$host);
    my $tmp;

    if ($b) { 
      $tmp = uc($b);
      $tmp = 'Oth' if $tmp !~ /[A-Z]/;
    } else {
      $tmp = uc($a);
    }
    $method = "wwwsgrep";
    $host = "http://www.nic.lv/DNS/list$tmp.htm";
  }

  if ($method eq 'wwwsgrep') {
    my ($hostname) = $host =~ /http:\/\/([^\/]*)\//;
    my $res;

    print "Querying $hostname with http.\n\n";

    $res = &wwwsgrep($host,$additional);
 
    if ($res ne '') { 
      $result = "Match found:\n$res";
    } else {
      $result = "No match found. This probably means that this domain does not exist.";
    }
  }

  if ($method eq 'wwwpe') {
    # why does every shitty nic need to invent its own standard?
    my ($hostname) = $host =~ /http:\/\/([^\/]*)\//;
    my $res;

    print "Querying $hostname with http.\n";

    $res = &wwwsgrep($host,$additional);
 
    if ($res ne '') { 
      $result = "Match found. Now querying for the domain data.\n\n";
      $result .= &doquery($query,'cgi','http://www.nic.pe/' . $res, '');
    } else {
      $result = "No match found. This probably means that this domain does not exist.";
    }
  }

  if ($method eq 'wwwbm') {
    # why does every shitty nic need to invent its own standard?
    my ($hostname) = $host =~ /http:\/\/([^\/]*)\//;
    my $res;

    print "Querying $hostname with http.\n";

    # Get session URL
    $res = &wwwsgrep($host,$additional);
    
    # Get lookup
    $result = &doquery($query,'cgipost',"http://$hostname$res",
      "ADOM++++++=$query&_PROCESS=BMWHO+&_FUNCTION=BMWHO2+",
    ); 
  }

  if ($method eq 'wwwbz') {
    # why does every shitty nic need to invent its own standard?
    my $hexstring;
    $hexstring = $query;
    $hexstring =~ s/./sprintf("%02x",ord("$&"))/eg;
    
    $result = &doquery($query,'cgi',$host.$hexstring,'');
  }

  if ($method =~ /^whois(|jp|arin)$/) {
    my ($port,$noipprefix,$ipprefix,$trailer,$strip,$parameter,$outquery);

    $port       = 43;
    $noipprefix = '';
    $ipprefix   = '';
    $trailer    = '';
    $strip      = '';

    foreach $parameter (split('\|', $additional)) {
      $port       = $1 if ( $parameter =~ /port=(\d+)/ );
      $noipprefix = $1 if ( $parameter =~ /noipprefix=(.*)/ );
      $ipprefix   = $1 if ( $parameter =~ /ipprefix=(.*)/ );
      $trailer    = $1 if ( $parameter =~ /trailer=(.*)/ );
      $strip      = $1 if ( $parameter =~ /strip=(.*)/ );
    }

    print "Querying $host:$port with whois.\n";
    
    if ( $query =~ /^\d+\.\d+\.\d+\.\d+$/ ) 
    {
      $outquery = $ipprefix . $query . $trailer . "\n";
    } else
    {
      $outquery = $noipprefix . $query . $trailer . "\n";
    }
    
    $outquery =~ s/$strip//g if ( $strip ne '' );

    $result = &whoisaccess($host,$port,$outquery);

    if ( $result =~ /ReferralServer: whois:\/\/(.*):(\d+)/mi )
    {
      print "Querying $1:$2 with whois.\n";
      $result = &whoisaccess($1,$2,$query);
    } elsif ( $result =~ /ReferralServer: whois:\/\/(.*)/mi )
    {
      print "Querying $1:43 with whois.\n";
      $result = &whoisaccess($1,43,$query);
    }

    print "\n";
  } 

  if ($method eq 'inicwhois' ) {
    my $port = $additional || 43;
    print $step++, ". Step: Querying $host:$port with whois.\n";
    $query .= "\n";
    $result = &inicwhoisaccess($host,$port,$query);
  } 

  if ($method eq 'cgi') {
    my ($hostname) = $host =~ /http:\/\/([^\/]*)\//;
    print "Querying $hostname with cgi.\n\n";
#!!
    print "$host\n";

    $result = `lynx -dump "$host"`;
  } 

  if ($method eq 'cgipost') {
    my ($hostname) = $host =~ /http:\/\/([^\/]*)\//;
    print "Querying $hostname with cgi.\n\n";
#!!   
    print "echo -e '$additional\n---' | lynx -dump -post_data '$host'\n";

    $result = `echo -e "$additional\n---" | lynx -dump -post_data "$host"`;
  } 

  if ($method eq 'cgipostcurl') {
    my ($hostname) = $host =~ /https{0,1}:\/\/([^\/]*)\//;
    print "Querying $hostname with cgi.\n\n";
#    print "$additional\n"; #!!
#    print "curl --stderr /dev/null -e $host --data '$additional' $host | lynx -dump -stdin\n";
    $result = `curl --stderr /dev/null -e "$host" --data "$additional" "$host" | lynx -dump -stdin`;
  } 

  if ($method eq 'cgihttps') {
    my ($hostname) = $host =~ /https:\/\/([^\/]*)\//;
    print "Querying $hostname with cgi.\n\n";
#    print "$additional\n"; #!!
#    print "curl --stderr /dev/null $host | lynx -dump -stdin\n";
    $result = `curl --insecure --stderr /dev/null "$host" | lynx -dump -stdin`;
  } 

  if ($method eq 'notice') {
    $result = "\n\nNo lookup service available for your query '$query'.\ngwhois remarks: " . $additional . "\n\n";
  }

  if ($host =~ /arin/) {
    if ($result =~ /Maintainer: RIPE/) { 
      $result = &doquery($query,'whois','whois.ripe.net','');
    } elsif ($result =~ /Maintainer: AP/) { 
      $result = &doquery($query,'whois','whois.apnic.net','');
    } 
  }
 
  if ($host =~ /apnic/) {
    if ($result =~ /netname: AUNIC-AU/) { 
      $result = &doquery($query,'whois','whois.aunic.net','');
    } elsif ($result =~ /netname: JPNIC-JP/) { 
      $result = &doquery($query,'whoisjp','whois.nic.ad.jp','');
    } 
  }
 
  if ($host =~ /ripe/ && $result =~ /remarks:\s+whois -h (\S+)/) {
    $result = &doquery($query,'whois',$1,'');
  }         
     
  if (($host =~ /internic/) && ($result =~ /No match for/) &&
      ($query !~ /\.(arpa|com|edu|net|org)$/) ) {
    my $result1=&doquery($query,'whois','whois.ripe.net','');
    if ($result1 !~ /No entries found/) {
      $result = $result1; 
    }
  }

  return $result;
}

sub main {
  my $query = lc shift;
  chomp $query;

  $query =~ s/^\s+//;
  $query =~ s/\s+$//;
  $query =~ y/[\xA0-\xFF]a-z0-9:.,+_ -//cd;
  $query =~ s/\.$//;

  my ($method,$host,$additional);
  print "Process query: '$query'\n";
  if ( $fixwhoishost )
  {
    ($method,$host,$additional) = ('whois',$fixwhoishost,'');
  } else
  {
    if ($query !~ /[^0-9\.]/) { # ipv4
      my ($a, $b, $c, $d, $e);
      ($a, $b, $c, $d, $e) = $query =~ /(\d+)\.(\d+)\.(\d+)\.(\d+)(.*)/;
      if ($a > 255 || $b > 255 || $c > 255 || $d > 255 || $e ne '') {
        die "$query is no valid IPv4-Address and no valid Domainname.\n";
      }
      print "Query recognized as IPv4.\n";
    
      ($method,$host,$additional) = &getmethodv4($a,$b,$c,$d);
    } elsif ( lc($query) !~ /[^0-9a-f:]/ ) { # ipv6
      # check and correct v6 address
      die "$query is an invalid IPv6-Address.\n" if ( $query =~ /[0-9a-f]{5}/ || $query =~ /:::/ );
      $query =~ s/:?$/::/ if ( $query !~ /(.*:){7}/ && $query !~ /::/ );

      print "Query recognized as IPv6.\n";

      ($method,$host,$additional) = &getmethodother($query);
    } else
    {
      ($method,$host,$additional) = &getmethodother($query);
    }
  }

  die "I don't know where to query that. STOP.\n"
    if ($method eq '');  

  my $result = &doquery($query,$method,$host,$additional);
  print $result,"\n\n";

  print "\n-- \n  To resolve one of the above handles: ";
 
  if ($method =~ /whois/) {
    print "whois -h $host";
    print ":$1" if ( $additional =~ /port=(\d+)/ );
    print " HANDLE\n";
  }

  if ($method eq "cgipost") {
    print "\n     POST $host\n";
    print "     $additional\n";
  }

  if ($method eq "cgi") {
    print "\n     $host\n";
  }

  if ($method eq "wwwgrep") {
    print "\n     hmm. not sure.\n";
  }

  print "  OTOH offical handles should be recognised directly.\n";
  print "  Please report errors or misfits via the debian bug tracking system.\n";
}

if($ARGV[0]) {
  $_ = join(' ',@ARGV);
} else {
  $_ = <>;
  chomp;
}
&main($_);
