#!/usr/bin/perl -X
## perl /home/xxxxxxx/public_html/kwscrape/pack.cgi
## perl /home/xxxxxxx/public_html/kwscrape/pack.cg
sub sendemail
{
my ($t,$s,$b) = @_;
if( open SENDMAIL, "|/usr/sbin/sendmail -oi -t -odq" ) {
$data{from}='xxxxxxxx@gmail.com';
$data{to}=$t;
$data{subject}=$s;
$data{text}=$b;
print SENDMAIL 'From: ' . $data{from} . "\n";
print SENDMAIL 'To: ' . $data{to} . "\n";
print SENDMAIL "Content-type: text/plain;charset=windows-1251\n";
print SENDMAIL 'Subject: ' . $data{subject} . "\n";
print SENDMAIL "\n";
print SENDMAIL $data{text};
close (SENDMAIL);
};
};
sub got_error{
$ke=0;
if (open(E,"<scrape_error.log")){
$ke=<E>;
close(E);
};
if ($ke==10){
sendemail('xxxxxxxxx@gmail.com','Fetching Error',"More then 10 times fetching error!");
$ke=0;
};
$ke++;
open(E,">scrape_error.log");
print E $ke;
close(E);
};
use LWP::UserAgent;
use HTTP::Request::Common;
$fwrite="log.txt";
if (open(C,"<stop.flg")){
close(C);
unlink("./stop.flg");
die;
};
if (open(C,"<pack.flg")){
close(C);
unlink("./pack.flg");
system("mv -f $fwrite pack.txt");
system("./arch.cgi &");
}else{
sleep(3);
};
#die;
#print "Content-type: text/html; charset=windows-1251\n\n";
#print "Started!";
chdir("/home/xxxxxxx/public_html/kwscrape/");
$ua = LWP::UserAgent->new;
$ua->agent('Mozilla/5.0');
$url='http://www.dogpile.com/info.dogpl/searchspy/inc/data.xml?filter=0';
($absurl) = $url =~ m!(http://.+?)/!si;
($otnurl) = $url =~ m!(http://.+)/!si;
$re = $ua->request(GET "$url");
if (!$re->is_success) {
print "Error at getting $url!\n";
got_error();
};
$mainresponse = $re->as_string;
($cont) = $mainresponse =~ m!(<.*>)!;
$cont =~ s!<.*?>!#!sig;
$cont =~ s!#+!#!sig;
$cont =~ s!&.*?;!!sig;
($cont) = $cont =~ m!#(.*)#!si;
@words = split("#",$cont);
open(F,">>$fwrite");
for($i=0;$i<scalar(@words);$i++){
print F "$words[$i]\n";
};
close(F);
system("./xmlscrape.cgi &");