#!/usr/bin/perl
#
#  generic Whois
#
#  (c) 1998-2007 by Michael Holzt
#  Some early parts by Lutz Donnerhacke <Lutz.Donnerhacke@Jena.Thur.de>
#
#  Distribution, usage etc. pp. regulated by the current version of GPL.
#
#
#
# History:
# 2008-02-18  kju   added handling for .bi and .gh (never a shortage
#                   of nics wo need special handling)
# 2007-09-03  kju   made output from verisign-grs more verbose
# 2007-02-17  kju   added mechanism for modifying redirects (see readme.ripe)
# 2006-12-02  kju   new method multiple for doing multiple queries
# 2005-09-25  kju   replaced 'noipprefix' and 'ipprefix' by 'prefix'
# 2005-09-25  kju   query string is no longer forced lower case (Debian Bug#324669)
# 2005-01-15  kju   Added version output
# 2005-01-13  kju   Added "search" method 'notice' (for giving advice
#                   when no whois/lookup is available)
#                   Added "cgipostcurl" method which uses curl to
#                   fake a referer which is needed for the .nic.ar
#                   (i hate you!)
# 2005-01-06  kju   new routines for searching ipv4 addresses for more
#                   speed improvement
# 2005-01-03  kju   To get rid of the ERX tables which slow down everything,
#                   requery is now ok and will no longer send mails.
#                   (Debian Bug#243148)
#                   Added handling code for .pe (grr) (Debian Bug#264794)
# 2004-03-07  kju   Totally changed the ip address handling code, now uses
#                   CIDR blocks.
# 	            Recognized 'ipas a.b.c.d' as a request to ask team
#		    cymru ip->as mapper whois
# 2004-03-03  kju   Integrated smaller fixes and changes done by lutz.
#                   Removed special code for publicinterestregistry, as
#                   transition is over, and referrers are nolonger used.
#                   Changed handling code for generic whois, can now take
#                   some parameters, removing need for special cases.
# 2003-07-29  kju   fixed publicinterestregistry which is now in transition
#		    phase integrating the whois data
# 2003-02-15  kju   fixed handling of dpkg-old pattern versions
# 2003-01-27  kju   whois.publicinterestregistry.net is now authorative for
#                   .org and needs another special treatment... :-(
#                   plus some minor fixes
# 2003-01-02  f2u   Add "+" for IP address queries to ARIN whois.
#                   Support multiple pattern files.
# 2002-11-01  cord  Small workaround for whois-servers that return ^M.
# 2002-01-17  kju   Reporting if query to upstream whois server failed
#		    (Debian Bug#122535)
# 2000-12-08  kju   Braindead answers from nsiregistry excluded (NSI again!)
# 2000-06-29  lutz  Multiple whois queries (kju)
# 2000-02-29  lutz  Removed obsolete cgibr.
#             lutz  Fixed bug in IP queries.
# 2000-02-08  lutz  Braindead answers from nsiregistry excluded
#                   Workaround for bug@internic removed.
# 1999-12-01  kju   Support for new Internic two-step Whois
# 1999-08-04  lutz  Minor fixed
# 1999-07-01  lutz  Allowed spaces, commas and colons
# 1999-04-12  lutz  Hotfix for problems with LWP::Simple
# 1998-12-21  lutz  solving problems with ripe handles.
# 1998-12-18  lutz  Mirrors possible
#             kju   wwwsgrep with LWP::Simple
#             lutz  multiple options bug fixed
#             kju   whois access made with LWP::Simple
#             lutz  Cosmetic changes,
#                   Configurable,
#                   Dies without line numbers,
#                   Direct server access
#             kju   External Configuration File,
#                   Major rewrite
# 1998-12-17  lutz  Published due to kju
#

use LWP::Simple;

$ENV{'HOME'}='/var/home/whois' unless defined $ENV{'HOME'};

$version = '20080218';

$| = 1;
$confdir = '/etc/gwhois';
$patternfilename = 'pattern';
$patternfilere = qr/^pattern/;
$useLWP = 1;

$sendmail = 1;
if ( -f "/etc/default/gwhois" )
{
  require "/etc/default/gwhois";
}

$step = 1;

while($ARGV[0]) {
  if($ARGV[0] eq '--help' || $ARGV[0] eq '-?') {
    print "gwhois - generic whois\n",
          "Version $version\n\n",
          "Usage: gwhois {options} [query]\n",
          " Try find information about the query (might be multiple words).\n",
	  " If no query is given, use the first line from stdin\n\n",
	  " Options:\n",
	  "   -C dir      setting an alternate configuration directory\n",
	  "               default $confdir\n",
	  "   -h host     selecting a fixed whois server for this query\n",
	  "   -m method:host host   Defining a mirror for a given method and host.\n",
	  "   -L          use lynx -source instead of LWP::Simple\n",
	  "   -v          output version of pattern table(s)\n",
	  "   -?, --help  printing this text\n\n";
    exit;
  } elsif($ARGV[0] eq '-C') {
    shift;
    $confdir = shift;
  } elsif($ARGV[0] eq '-h') {
    shift;
    $fixwhoishost = shift;
  } elsif($ARGV[0] eq '-L') {
    shift;
    $useLWP = 0;
  } elsif($ARGV[0] eq '-m') {
    shift;
    $_ = shift;
    s/://;
    $mirror{$_}=shift;
  } elsif($ARGV[0] eq '-v') {
    print "gwhois - generic whois\n\n",
          "program version:  $version\n",
          "pattern tables:   ";
    foreach $patternfile (&getpatternfiles()) {
      $patternfile = "$confdir/$patternfile";
      if ( open(PATTERN,"<$patternfile") )
      {
        $line = <PATTERN>;
        close(PATTERN);
        
        ($version) = $line =~ /#:\s+version\s+(\S+)/;
        $version = 'unknown' if ( $version eq '' );
        print "$version\t($patternfile)\n                  ";
      }
    }  
    print "\n";
    exit(0);
  } else 
  {
    last;
  }
}

# $result = &whoisaccess($host,$port,$query)
sub whoisaccess {
  my ($host,$port,$query) = @_;
  my ($result);

  $query =~ s/ /%20/g;
  if (!defined( $result = $useLWP ? LWP::Simple::get("gopher://$host:$port/0$query")
                    : qx{lynx -source gopher://$host:$port/0$query} ) )
  {
    $result = 'Query to whois server failed.';
  }

  $result =~ s/
//g;
  
  return $result;
}

# $result = &inicwhoisaccess($host,$port,$query)
sub inicwhoisaccess {
  my ($host,$port,$query) = @_;
  my ($queryresult, $result, $result2);

  $queryresult = whoisaccess($host,$port,"=$query");

  # Result von NSI-Registry auf relevanten Part absuchen
  if ( $queryresult =~ /Name:\s+$query\s/mi ) {
    $result = "-- From: $host:$port\n\n";
    ($host) = $queryresult =~
      /Name:\s+$query\s.*?Whois Server:\s+(.*?)\s/si;
    ($relresult) = $queryresult =~
      /[\r\n]([^\r\n]+\S+\sName:\s+$query\s.*?Expiration Date:[^\r\n]+)[\r\n]/si;
    
    $result .= "$relresult\n\n-- End --\n\n";
    
    print $step++,". Step: Querying $host:$port with whois.\n\n";
    $port = 43;

    $result .= "-- From: $host:$port\n\n";
    $result .= whoisaccess($host,$port,$query);
  } else {
    $result = "-- From: $host:$port\n\n$queryresult-- End --\n";
  }
  
  return $result;
}

# $result = &wwwsgrep($url,$match)
sub wwwsgrep {
  my ($url,$match) = @_;
  my ($res, $line);
  $line = $useLWP ? LWP::Simple::get($url)
                  : qx(lynx -source $url);
  $line =~ s/\n/ /g;
  if ($line =~ $match) {
    ($res) = $line =~ /$match/s;
  }
  return $res;
}

# ($host, $additional) = &methodpatternregex($query,$host,$additional,$queryline);
sub methodpatternregex {
  my ($query,$host,$additional,$line) = @_;

  my ($namewotld,$tld) = $query =~ /^(.*)\.([^.]*)$/;
  my ($p1,$p2) = $query =~ $line;
  my ($ucq) = uc($query);
  
  $host       =~ s/~query~/$query/;
  $host       =~ s/~ucquery~/$ucq/;
  $host       =~ s/~namewotld~/$namewotld/;
  $host       =~ s/~tld~/$tld/;
  $host       =~ s/~1~/$p1/;
  $host       =~ s/~2~/$p2/;
  
  $additional =~ s/~query~/$query/;
  $additional =~ s/~ucquery~/$ucq/;
  $additional =~ s/~namewotld~/$namewotld/;
  $additional =~ s/~tld~/$tld/;
  $additional =~ s/~1~/$p1/;
  $additional =~ s/~2~/$p2/;
  
  return ($host,$additional);
}

# @patternfiles = &getpatternfiles()
sub getpatternfiles {
  my (@files);

  # Find available pattern files
  opendir(DIR, "$confdir");
  @files = sort(readdir(DIR));
  closedir(DIR);

  # Move "pattern" (main file) to the end, filter non-pattern files (and old -erx ones).
  @files = grep { ($_ =~ /$patternfilere/) 
		  && ($_ ne $patternfilename) && ($_ !~ /\.dpkg-new$/) && ($_ !~ /\.dpkg-old$/) && ($_ !~ /\.orig$/) && ($_ !~ /-erx/) } @files;
  push @files, $patternfilename;

  return (@files);
}


# ($method, $host, $additional) = &getmethodother($query);
sub getmethodother {
  my ($query) = @_;
  my $found=0;
  my ($line,$cline,$method,$host,$additional,@files);


  # Process file until we found a match
  foreach my $patternfile (&getpatternfiles()) {
    $patternfile = "$confdir/$patternfile";

    open(PATTERN,"<$patternfile") || die "Cannot open $patternfile. STOP.\n";
  
    while ( defined($line = <PATTERN>) && (!$found) ) {
      chomp $line;

      if ( $line =~ /^#/ ) {                       # comment
      } elsif ( ($cline) = $line =~ /^:(.*)$/ ) {  # method declaration
        ($method,$host,$additional) = split(/\|/,$cline,3);
      } elsif ( $line ne '' && $query =~ /$line/i ) {
        $found = 1;
        ($host,$additional) = &methodpatternregex($query,$host,$additional,$line);
      }
    }

  }
  if (!$found) {
    return ('','','')
  }
  $host = $mirror{$method.$host} if defined $mirror{$method.$host};
  return ($method,$host,$additional);
}


# %v4pattern = &getpatternv4()
sub getpatternv4 {
  my (%pattern);
  my ($method,$host,$additional,$cline,$line,$rehost,$readditional);

  foreach my $patternfile (&getpatternfiles()) {
    $patternfile = "$confdir/$patternfile";
    open(PATTERN,"<$patternfile") || die "Cannot open $patternfile. STOP.\n";
  
    while ( defined($line = <PATTERN>) && (!$found) ) {
      chomp $line;

      if ( $line =~ /^#/ ) {                       # comment
      } elsif ( ($cline) = $line =~ /^:(.*)$/ ) {  # method declaration
        ($method,$host,$additional) = split(/\|/,$cline,3);
      } elsif ( $line =~ /^=/ ) {
        ($rehost,$readditional) = &methodpatternregex($query,$host,$additional,$line);
        $pattern{$line}{'method'} = $method;
        $pattern{$line}{'host'}   = $rehost;
        $pattern{$line}{'add'}    = $readditional;
      }
    }
  }
  return (%pattern);
}


# ($method, $host, $additional) = &getmethodv4($query);
sub getmethodv4 {
  my ($ipa, $ipb, $ipc, $ipd) = @_;
  my ($ip, $bits, $netmask, $method, $host, $additional, %pattern);
  
  $ip      = $ipa<<24|$ipb<<16|$ipc<<8|$ipd;
  $netmask = 256**4-1;
  %pattern = &getpatternv4();
  
  for ($bits=32;$bits>=0&&$method eq '';$bits--)
  {
    $ip        = $ip & $netmask;
    $netmask <<= 1;

    if ( $bits > 24 ) {
      $cidr = sprintf("%d.%d.%d.%d/$bits", $ip>>24,($ip>>16)&255,($ip>>8)&255,$ip&255);
    } elsif ( $bits > 16 ) {
      $cidr = sprintf("%d.%d.%d/$bits", $ip>>24,($ip>>16)&255,($ip>>8)&255);
    } elsif ( $bits > 8 ) {
      $cidr = sprintf("%d.%d/$bits", $ip>>24,($ip>>16)&255);
    } else {
      $cidr = sprintf("%d/$bits", $ip>>24);
    }
 
    $method     = $pattern{"=$cidr"}{'method'};
    $host       = $pattern{"=$cidr"}{'host'};
    $additional = $pattern{"=$cidr"}{'add'};
  }
  
  return ($method,$host,$additional);
}

  
# $rendered = &render_html($html);
sub render_html
{
  my ($html) = @_;

  $html =~ s|\n||g;

  $html =~ s|<br/{0,1}>|\n|gsi;
  $html =~ s|<br\s.*?>|\n|gsi;

  $html =~ s|<p/{0,1}>|\n|gsi;
  $html =~ s|<p\s.*?>|\n|gsi;

  $html =~ s|<tr/{0,1}>|\n|gsi;
  $html =~ s|<tr\s.*?>|\n|gsi;
  
  $html =~ s|<script.*?</script>||gsi;
  $html =~ s|\<.*?\>||gsi;
  $html =~ s|&nbsp;| |gsi;
  $html =~ s| \t| |gsi;
  $html =~ s|\s*\n\s*\n|\n|gsi;
  $html =~ s|^\s*||gm;
  
  return($html);
}


sub redirectwhois 
{
  my ($query,$host) = @_;

  # check for query modifier (if any)
  my ($modmethod, $modhost, $modadditional) = &getmethodother("redirect:$host");

  return &doquery($query,$modmethod,$modhost,$modadditional)
    if ( $modmethod ne 'none');
    
  return &doquery($query,'whois',$host,'')
}


# $result=&doquery($query,$method,$host,$additional);
sub doquery {
  my ($query,$method,$host,$additional) = @_;
  my $result;

  if ($method eq 'multiple') {
    my $triple;
    foreach $triple ( split(/:::/, $additional) )
    {
      ($method,$host,$additional) = split(/::/, $triple);
      $result .= &doquery($query, $method, $host, $additional);
      $result .= "\n\n------\n\n";
    }
    
    # done
    $method = '';
  } 


  if ($method eq 'wwwgreplv') {
    my ($a,$b) = split(/#/,$host);
    my $tmp;

    if ($b) { 
      $tmp = uc($b);
      $tmp = 'Oth' if $tmp !~ /[A-Z]/;
    } else {
      $tmp = uc($a);
    }
    $method = "wwwsgrep";
    $host = "http://www.nic.lv/DNS/list$tmp.htm";
  }

  if ($method eq 'wwwsgrep') {
    my ($hostname) = $host =~ /http:\/\/([^\/]*)\//;
    my $res;

    print "Querying $hostname with http.\n\n";

    $res = &wwwsgrep($host,$additional);
 
    if ($res ne '') { 
      $result = "Match found:\n$res";
    } else {
      $result = "No match found. This probably means that this domain does not exist.";
    }
  }

  if ($method eq 'wwwpe') {
    # why does every shitty nic need to invent its own standard?
    my ($hostname) = $host =~ /http:\/\/([^\/]*)\//;
    my $res;

    print "Querying $hostname with http.\n";

    $res = &wwwsgrep($host,$additional);
 
    if ($res ne '') { 
      $result = "Match found. Now querying for the domain data.\n\n";
      $result .= &doquery($query,'cgi','http://www.nic.pe/' . $res, '');
    } else {
      $result = "No match found. This probably means that this domain does not exist.";
    }
  }

  if ($method eq 'wwwgh') {
    # why does every shitty nic need to invent its own standard?
    my ($hostname) = $host =~ /http:\/\/([^\/]*)\//;
    my $res;

    ($dom,$sld) = $query =~ /(.*?)\.(com|org|gov|edu|biz)\.gh$/;
    $res = &doquery('','cgipost','http://www.nic.gh/customer/result_c.php',
      "r_cdm=$dom&r_dom_slvl=$sld&Submit=Search");

    if ( $res =~ m|(customer/displayresult_c.php\?id=\d+)|s )
    {
      $result = "Match found. Now querying for the domain data.\n\n";
      $result .= &doquery($query,'cgi',"http://www.nic.gh/$1");
    } else {
      $result = "No match found. This probably means that this domain does not exist.";
    }
  }

  if ($method eq 'wwwbi') {
    # why does every shitty nic need to invent its own standard?
    my ($hostname) = $host =~ /http:\/\/([^\/]*)\//;
    my $res;

    print "Querying $hostname with http.\n";

    # Get session URL
    $res = &wwwsgrep($host,$additional);
    

#    print "$res\n"; exit;
    # Get lookup
    $result = &doquery($query,'cgi',"$host?card=$res&f_1.qdomain=$query&f_1.type=domain&f_1.q=Search");
  }

  if ($method eq 'wwwbm') {
    # why does every shitty nic need to invent its own standard?
    my ($hostname) = $host =~ /http:\/\/([^\/]*)\//;
    my $res;

    print "Querying $hostname with http.\n";

    # Get session URL
    $res = &wwwsgrep($host,$additional);
    
    # Get lookup
    $result = &doquery($query,'cgipost',"http://$hostname$res",
      "ADOM++++++=$query&_PROCESS=BMWHO+&_FUNCTION=BMWHO2+",
    ); 
  }


  if ($method eq 'wwwbz') {
    # why does every shitty nic need to invent its own standard?
    my $hexstring;
    $hexstring = $query;
    $hexstring =~ s/./sprintf("%02x",ord("$&"))/eg;
    
    $result = &doquery($query,'cgi',$host.$hexstring,'');
  }

  if ($method =~ /^whois(|jp|arin)$/) {
    my ($port,$trailer,$strip,$parameter,$outquery,$prefix);

    $port       = 43;
    $noipprefix = '';
    $ipprefix   = '';
    $trailer    = '';
    $strip      = '';

    foreach $parameter (split('\|', $additional)) {
      $port       = $1 if ( $parameter =~ /port=(\d+)/ );
      $trailer    = $1 if ( $parameter =~ /trailer=(.*)/ );
      $strip      = $1 if ( $parameter =~ /strip=(.*)/ );
      $prefix     = $1 if ( $parameter =~ /prefix=(.*)/ );
    }
 
    print "Querying $host:$port with whois.\n";
    
    $outquery = $prefix . $query . $trailer . "\n";
    $outquery =~ s/$strip//g if ( $strip ne '' );

    $result = &whoisaccess($host,$port,$outquery);

    if ( $result =~ /ReferralServer: whois:\/\/(.*):43/mi ||
         $result =~ /ReferralServer: whois:\/\/(.*)/mi )
    {
      $result = &redirectwhois($query,$1);
      $host = '';
    } elsif ( $result =~ /ReferralServer: whois:\/\/(.*):(\d+)/mi )
    {
      $result = &whoisaccess($1,$2,$query);
    }
    
    print "\n";
    
  } 

  if ($method eq 'inicwhois' ) {
    my $port = $additional || 43;
    print $step++, ". Step: Querying $host:$port with whois.\n";
    $query .= "\n";
    $result = &inicwhoisaccess($host,$port,$query);
  } 

  if ($method eq 'cgi') {
    my ($hostname) = $host =~ /http:\/\/([^\/]*)\//;
    print "Querying $hostname with cgi.\n\n";
#!!
#    print "$host\n";

#    $result = `lynx -dump "$host"`;
  
    my $html = `lynx -source "$host"`;
    $result = &render_html($html);
  } 

  if ($method eq 'cgipost') {
    my ($hostname) = $host =~ /http:\/\/([^\/]*)\//;
    print "Querying $hostname with cgi.\n\n";
#!!   
#    print "echo -e '$additional\n---' | lynx -dump -post_data '$host'\n";

    $result = `echo -e "$additional\n---" | lynx -dump -post_data "$host"`;
  } 

  if ($method eq 'cgipostcurl') {
    my ($hostname) = $host =~ /https{0,1}:\/\/([^\/]*)\//;
    print "Querying $hostname with cgi.\n\n";
#    print "$additional\n"; #!!
#    print "curl --stderr /dev/null -e $host --data '$additional' $host | lynx -dump -stdin\n";
    $result = `curl --stderr /dev/null -e "$host" --data "$additional" "$host" | lynx -dump -stdin`;
  } 

  if ($method eq 'cgihttps') {
    my ($hostname) = $host =~ /https:\/\/([^\/]*)\//;
    print "Querying $hostname with cgi.\n\n";
#    print "$additional\n"; #!!
#    print "curl --stderr /dev/null $host | lynx -dump -stdin\n";
#    $result = `curl --insecure --stderr /dev/null "$host" | lynx -dump -stdin`;
    my $html = `curl --insecure --stderr /dev/null "$host"`;
    print &render_html($html);
  } 

  if ($method eq 'notice') {
    $result = "\n\nNo lookup service available for your query '$query'.\ngwhois remarks: " . $additional . "\n\n";
  }

  if ($host =~ /arin/) {
    if ($result =~ /Maintainer: RIPE/) { 
      $result = &redirectwhois($query,'whois.ripe.net');
    } elsif ($result =~ /Maintainer: AP/) { 
      $result = &redirectwhois($query,'whois.apnic.net');
    } 
  }
 
  if ($host =~ /apnic/) {
    if ($result =~ /netname: AUNIC-AU/) { 
      $result = &redirectwhois($query,'whois.aunic.net');
    } elsif ($result =~ /netname: JPNIC-JP/) { 
      $result = &redirectwhois($query,,'whois.nic.ad.jp');
    } 
  }
 
  if ($host =~ /ripe/ && $result =~ /remarks:\s+whois -h (\S+)/) {
    $result = &redirectwhois($query,$1);
  }         
     
  if (($host =~ /internic/) && ($result =~ /No match for/) &&
      ($query !~ /\.(arpa|com|edu|net|org)$/) ) {
    my $result1=&redirectwhois($query,'whois.ripe.net');
    if ($result1 !~ /No entries found/) {
      $result = $result1; 
    }
  }

  return $result;
}

sub main {
  my $query = shift;
  chomp $query;

  $query =~ s/^\s+//;
  $query =~ s/\s+$//;
  $query =~ y/[\xA0-\xFF]a-zA-Z0-9:.,+_ -//cd;
  $query =~ s/\.$//;

  my ($method,$host,$additional);
  print "Process query: '$query'\n";
  if ( $fixwhoishost )
  {
    ($method,$host,$additional) = ('whois',$fixwhoishost,'');
  } else
  {
    if ($query !~ /[^0-9\.]/) { # ipv4
      my ($a, $b, $c, $d, $e);
      ($a, $b, $c, $d, $e) = $query =~ /(\d+)\.(\d+)\.(\d+)\.(\d+)(.*)/;
      if ($a > 255 || $b > 255 || $c > 255 || $d > 255 || $e ne '') {
        die "$query is no valid IPv4-Address and no valid Domainname.\n";
      }
      print "Query recognized as IPv4.\n";
    
      ($method,$host,$additional) = &getmethodv4($a,$b,$c,$d);
    } elsif ( lc($query) !~ /[^0-9a-f:]/i ) { # ipv6
      # check and correct v6 address
      die "$query is an invalid IPv6-Address.\n" if ( $query =~ /[0-9a-f]{5}/ || $query =~ /:::/ );
      $query =~ s/:?$/::/ if ( $query !~ /(.*:){7}/ && $query !~ /::/ );

      print "Query recognized as IPv6.\n";

      ($method,$host,$additional) = &getmethodother($query);
    } else
    {
      ($method,$host,$additional) = &getmethodother($query);
    }
  }

  die "I don't know where to query that. STOP.\n"
    if ($method eq '');  

  my $result = &doquery($query,$method,$host,$additional);
  print $result,"\n\n";

  print "\n-- \n  To resolve one of the above handles: ";
 
  if ($method =~ /whois/) {
    print "whois -h $host";
    print ":$1" if ( $additional =~ /port=(\d+)/ );
    print " HANDLE\n";
  }

  if ($method eq "cgipost") {
    print "\n     POST $host\n";
    print "     $additional\n";
  }

  if ($method eq "cgi") {
    print "\n     $host\n";
  }

  if ($method eq "wwwgrep") {
    print "\n     hmm. not sure.\n";
  }

  print "  OTOH offical handles should be recognised directly.\n";
  print "  Please report errors or misfits via the debian bug tracking system.\n";
}

if($ARGV[0]) {
  $_ = join(' ',@ARGV);
} else {
  $_ = <>;
  chomp;
}
&main($_);
