#! /usr/bin/perl -w use strict; use FileHandle; sub eancheckdigit { my($string) = @_; my $weight = 1; my $sum = 0; foreach my $digit(split('', $string)) { $sum += $digit * $weight; $weight = 4 - $weight; } return (10 - ($sum % 10)) % 10; }; sub isbncheckdigit { my($string) = @_; my $weight = 10; my $sum = 0; foreach my $digit(split('', $string)) { $sum += $digit * $weight; $weight--; } $sum = (11 - ($sum % 11)) % 11; if ($sum == 10) { $sum = 'X'; } return $sum; }; sub isbnhyphenate { my ($isbn) = @_; # source: http://usin.org/software/servers/ISBN-ISSN.phps my @country_group_partition = ( 0, 80, 950, 9960, 99900 ); my %country_group_map = ( 0 => [ '00',200,7000,85000,900000,9500000 ], 1 => [ '00000000',55000,869800,9999900 ], 2 => [ '00',200,40000000,500,7000,84000,900000,9500000 ], 3 => [ '00',200,7000,85000,900000,9500000 ], 4 => ['00',200,7000,85000,900000,9500000], 5 => ['00',200,7000,85000,900000,9500000], 7 => ['00',100,5000,80000,900000], 80 => ['00',200,7000,85000,900000], 81 => ['00',200,7000,85000,900000], 82 => ['00',200,7000,90000,990000], 83 => ['00',200,7000,85000,900000], 84 => ['00',200,7000,85000,900000,95000,9700], 85 => ['00',200,7000,85000,900000], 86 => ['00',300,7000,80000,900000], 87 => ['00',400,7000,85000,970000], 88 => ['00',200,7000,85000,900000], 89 => ['00',300,7000,85000,950000], 90 => ['00',200,5000,70000,800000,9000000], 91 => ['0',20,500,6500000,7000,8000000,85000,9500000,970000], 92 => ['0',60,800,9000], 93 => ['0000000'], 950 => ['00',500,9000,99000], 951 => ['0',20,550,8900,95000], 952 => ['00',200,5000,89,9500,99000], 953 => ['0',10,150,6000,96000], 954 => ['00',400,8000,90000], 955 => ['0',20,550,800000,9000,95000], 956 => ['00',200,7000], 957 => ['00',440,8500,97000], 958 => ['0',600,9000,95000], 959 => ['00',200,7000], 960 => ['00',200,7000,85000], 961 => ['00',200,6000,90000], 962 => ['00',200,7000,85000], 963 => ['00',200,7000,85000], 964 => ['00',300,5500,90000], 965 => ['00',200,7000,90000], 966 => ['00',500,7000,90000], 967 => ['0',60,900,9900,99900], 968 => ['000000',10,400,500000,6000,800,900000], 969 => ['0',20,400,8000], 970 => ['00',600,9000,91000], 971 => ['00',500,8500,91000], 972 => ['0',20,550,8000,95000], 973 => ['0',20,550,9000,95000], 974 => ['00',200,7000,85000,900000], 975 => ['00',300,6000,92000,980000], 976 => ['0',40,600,8000,95000], 977 => ['00',200,5000,70000], 978 => ['000',2000,30000], 979 => ['0',20,300000,400,700000,8000,95000], 980 => ['00',200,6000], 981 => ['00',200,3000], 982 => ['00',100,500000], 983 => ['000',2000,300000,50,800,9000,99000], 984 => ['00',400,8000,90000], 985 => ['00',400,6000,90000], 986 => ['000000'], 987 => ['00',500,9000,99000], 9952 => ['00000'], 9953 => ['0',20,9000], 9954 => ['00',8000], 9955 => ['00',400], 9956 => ['00000'], 9957 => ['00',8000], 9958 => ['0',10,500,7000,9000], 9959 => ['00'], 9960 => ['00',600,9000], 9961 => ['0',50,800,9500], 9962 => ['00000'], 9963 => ['0',30,550,7500], 9964 => ['0',70,950], 9965 => ['00',400,9000], 9966 => ['00',70000,800,9600], 9967 => ['00000'], 9968 => ['0',10,700,9700], 9970 => ['00',400,9000], 9971 => ['0',60,900,9900], 9972 => ['0',40,600,9000], 9973 => ['0',10,700,9700], 9974 => ['0',30,550,7500], 9975 => ['0',50,900,9500], 9976 => ['0',60,900,99000,9990], 9977 => ['00',900,9900], 9978 => ['00',950,9900], 9979 => ['0',50,800,9000], 9980 => ['0',40,900,9900], 9981 => ['0',20,800,9500], 9982 => ['00',40000,800,9900], 9983 => ['00',500,80,950,9900], 9984 => ['00',500,9000], 9985 => ['0',50,800,9000], 9986 => ['00',400,9000], 9987 => ['00',400,8800], 9988 => ['0',30,550,7500], 9989 => ['0',30,600,9700], 99901 => ['00'], 99903 => ['0',20,900], 99904 => ['0',60,900], 99905 => ['0',60,900], 99906 => ['0',60,900], 99908 => ['0',10,900], 99909 => ['0',40,950], 99910 => ['0000'], 99911 => ['00',600], 99912 => ['0',60,900], 99913 => ['0',30,600], 99914 => ['0',50,900], 99915 => ['0',50,800], 99916 => ['0',30,700], 99917 => ['0',30], 99918 => ['0',40,900], 99919 => ['0',40,900], 99920 => ['0',50,900], 99921 => ['0',20,700], 99922 => ['0',50], 99923 => ['0',20,800], 99924 => ['0',30], 99925 => ['0',40,800], 99926 => ['0000',600], 99927 => ['0',30,600], 99928 => ['0',50,800], 99929 => ['0000'], 99930 => ['0',50,800], 99931 => ['0000'], 99932 => ['0',10], 99933 => ['00',300], 99934 => ['0'], 99935 => ['0000'], 99936 => ['0000'], 99937 => ['0',20] ); # determine country group my $group = substr($isbn, 0, length($country_group_partition[0])); { my $ngroup = 1; while( defined($country_group_partition[$ngroup]) and ( ( $country_group_partition[$ngroup] cmp substr($isbn, 0, length($country_group_partition[$ngroup])) ) < 0 ) ) { $group = substr($isbn, 0, length($country_group_partition[$ngroup])); $ngroup++; } } # determine publisher prefix my $prefix = substr($isbn, length($group), length($country_group_map{$group}->[0])); { my $nprefix = 1; while( defined($country_group_map{$group}->[$nprefix]) and ( ( $country_group_map{$group}->[$nprefix] cmp substr($isbn, length($group), length($country_group_map{$group}->[$nprefix])) ) < 0 ) ) { $prefix = substr($isbn, length($group), length($country_group_map{$group}->[$nprefix])); $nprefix++; } } my $itemstart = length($group) + length($prefix); my $itemlength = length($isbn) - $itemstart - 1; return $group . '-' . $prefix . '-' . substr($isbn, $itemstart, $itemlength) . '-' . substr($isbn, $itemstart + $itemlength, 1); }; sub retrieve { my ($url) = @_; require LWP::UserAgent; my $ua = LWP::UserAgent->new( timeout => 30, env_proxy => 1, agent => 'Mozilla/4.0; compatible; MSIE 6.0; perl bot', keep_alive => 1 ); my $request = HTTP::Request->new(GET => $url); $request->protocol('HTTP/1.1'); my $response = $ua->request($request); if($response->is_success) { return $response->content; } else { $url =~ s/&/\\&/g; return `/usr/local/bin/GET $url`; } return ''; } sub IsErrorCode { my($code) = @_; $code =~ /^\s*Not found\.?\s*$/gi and return 1; $code =~ /^\s*Book Search\s*$/gi and return 1; $code =~ /^\s*Advanced Book Search\s*$/gi and return 1; $code =~ /^\s*\*\*\* Not found\. \*\*\*?\s*$/gi and return 1; $code =~ /^\s*No Title Found\s*$/gi and return 1; $code =~ /^\s*Books.+Used.+Out of Print.+DVDs.+Toys\s*$/gi and return 1; $code =~ /^\s*by\s*$/gi and return 1; return 0; } sub forceISBN { my ($isbn) = @_; if($isbn =~ /(978)([0-9]{9}?)([0-9])/) { # todo: compare eancheckdigit ($1.$2) to $3, bail if not equal $isbn = $2 . &isbncheckdigit($2); } return $isbn; } sub lookup { my ($isbn) = @_; my $html = ''; $isbn = forceISBN($isbn); $isbn =~ /^([0-9]{9}?)([0-9xX])$/ or return "Not a valid ISBN."; # todo: compare isbncheckdigit($1) to $2, bail if not equal $isbn = $1.$2; my $url; $url = 'http://www.amazon.co.uk/exec/obidos/ASIN/' . $isbn ; $html = retrieve($url); if( ($html =~ /buy this book)'; } } $url = 'http://isbn.nu/' . $isbn; $html = retrieve($url); if($html =~ /

]+>(.+?)<\/font>/) { if(!IsErrorCode($1)) { return $1.' (source)'; } } $url = 'http://www.ozon.ru/?context=advsearch_book&isbn=' . isbnhyphenate($isbn); $html = retrieve($url); if($html =~ /class="big1">(.+?)<\//) { if(!IsErrorCode($1)) { return $1.' (source)'; } } $url = 'http://www.amazon.com/exec/obidos/ASIN/' . $isbn; $html = retrieve($url); if( ($html =~ //i)) { if(!IsErrorCode($1)) { return $1.' (source)'; } } # $url = 'http://www.amazon.co.jp/exec/obidos/ASIN/' . $isbn; # $html = retrieve($url); # if( ($html =~ //i)) # { # if(!IsErrorCode($1)) { return $1.' (source)'; } # } $url = 'http://www.biblio.com/isbn/' . $isbn . '.html'; $html = retrieve($url); if($html =~ /Biblio: \(ISBN: .+?\) (.+?)<\//) { if(!IsErrorCode($1)) { return $1.' (<a href="'.$url.'">source</a>)'; } } $url = 'http://search.barnesandnoble.com/booksearch/isbninquiry.asp?ISBN=' . $isbn; $html = retrieve($url); if($html =~ /<title>Barnes\ \;\&\;\ \;Noble.com - (.+?)<\//) { if(!IsErrorCode($1)) { return $1.' (<a href="'.$url.'">source</a>)'; } } $url = 'http://my.linkbaton.com/isbn/' . $isbn; $html = retrieve($url); if($html =~ /content='ISBN, book, author, (.+?, )[0-9xX]+, (.+?)'/) { if(!IsErrorCode($1)) { return $1.' (<a href="'.$url.'">source</a>)'; } } $url = 'http://www.google.co.uk/search?btnI=1&q=inurl:ffbooks+' . $isbn; $html = retrieve($url); if($html =~ /<title>(.+?by.+?)<\//) { if(!IsErrorCode($1)) { return $1.' (<a href="'.$url.'">source</a>)'; } } $url = 'http://wuz.librialice.it/scheda.aspx?isbn=' . $isbn; $html = retrieve($url); if($html =~ /<title>(.+? - .+?) *<\//) { if(!IsErrorCode($1)) { return $1.' (<a href="'.$url.'">source</a>)'; } } # $url = 'http://www.centraldellibro.com/web/ES/APL/resultados_busqueda.asp?cad_busq=' . $isbn; # $html = retrieve($url); # if($html =~ /<title>[^<]+?: ([<]+) *<\/.+<meta name/) # { # if(!IsErrorCode($1)) { return $1.' (<a href="'.$url.'">source</a>)'; } # } return "*** Not found. ***"; }; if(1) { use CGI; my $q = new CGI; print $q->header, $q->start_html; if(defined($q->param('code'))) { my $code = $q->param('code'); $code =~ s/[^0-9xX]//g; my $result = ''; length($code) and $result = lookup($code); print <<END <!-- If you are screen-scraping the output of this code, please consider contacting the author for a copy of the latest version. He will be happy to provide you with Perl source to run locally free of charge. moonshadow (at) toothycat.net --> END ; length($code) and print $code . ': <b><div class="data">' . $result . "</div></b><br>\n"; length($code) and print 'Click <A HREF="barcode.pl?isbn=' . $code . '">here</A> to generate a barcode for this ISBN.<br>'; if(length $result) { my $fh = new FileHandle('>> books.log'); print $fh '<div class="entry">'; print $fh '<span class="entry">'; print $fh '<a href="/cgi/book.pl?code=' . $code . '">'; print $fh $code.'</a></span>'; print $fh '<span class="entry">'.$result.'</span>'; print $fh '</div>'."\n"; $fh->close(); } } print '<br><hr>'; print '<br>Type an ISBN or EAN and press enter to look it up: '; print $q->start_form(-method=>'GET'); print $q->textfield('code','',10,13); print <<'AD' <hr> <P ALIGN="right"> <script type="text/javascript"><!-- google_ad_client = "pub-3160732673995309"; google_ad_width = 234; google_ad_height = 60; google_ad_format = "234x60_as"; google_ad_type = "text"; google_ad_channel =""; google_color_border = "EEEEEE"; google_color_bg = "FFFFFF"; google_color_link = "AAAAAA"; google_color_url = "CCCCCC"; google_color_text = "999999"; //--></script> <script type="text/javascript" src="http://pagead2.googlesyndication.com/pagead/show_ads.js"> </script> </P> AD ; print $q->end_form; print $q->end_html; } else { print $ARGV[0] . ': ' . lookup($ARGV[0]) . "\n"; }