I think the project is small enough if we had 2-4 people we could crank it out in no time.
Just create an account on bitbucket, send PM your username and I'll add you to the project and we can go from there.
PHP got you down?gotta drop out of the running on this one; work just got super busy, real fast, no time for proxeh business these days, sorry!
*****SQL******
DROP TABLE IF EXISTS proxy;
CREATE TABLE proxy (
proxy varchar(32) primary key,
user varchar(32),
password varchar(32),
level int(1),
working int(1),
latency int(3),
timestamp int(11)
);
// LOAD DATABASE
$con = mysql_connect($DB_SERVER,$DB_USER,$DB_PASS); if (!$con) {die('Could not connect: ' . mysql_error());}
mysql_select_db($DB_NAME, $con);
function loadproxies($proxies, $con)
{
$count = count($proxies); //number of items in array
echo 'Number of proxies in list: ' . $count . '<br />';
$timestamp = time();
foreach($proxies as $row)
{
$sql="INSERT INTO proxy (proxy, timestamp)VALUES
('$row','$timestamp')
ON DUPLICATE KEY UPDATE timestamp = $timestamp";
echo "<br />" . $sql;
if (!mysql_query($sql,$con)){ die('Error: ' . mysql_error());}
}
return $proxies; //return an array of proxies
}
$proxies = file('proxies.txt'); //loads a file into an array each line being a new element
$proxies = loadproxies($proxies, $con); //$proxies will be a returned array of proxies
$sql="select proxy from proxy";
//echo $sql . "<br>";
$results = mysql_query($sql, $con) or die(mysql_error());
$proxies = array();
while($row = mysql_fetch_assoc($results))
{
$proxies[] = $row['proxy'];
}
// SELECT A RANDOM PROXY
srand((float)microtime() * 1000003);
shuffle($proxies);
$proxy = $proxies[0];
echo "Proxy: " . $proxy . "<br />";
function getPage($proxy, $url, $referer, $agent, $header, $timeout) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_VERBOSE, 1);
curl_setopt($ch, CURLOPT_HEADER, $header);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_POST, 0);
curl_setopt($ch, CURLOPT_PROXY, $proxy);
//curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, 1);
//curl_setopt ($ch, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
//curl_setopt ($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
//curl_setopt($ch, CURLOPT_PROXYPORT, 8080);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_REFERER, $referer);
curl_setopt($ch, CURLOPT_USERAGENT, $agent);
$result = curl_exec($ch);
curl_close($ch);
return $result;
}
$results = getPage(
$proxy,
'http://pageI'mabouttoscrape.com',
'http://www.somereferrer.com',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.8) Gecko/2009032609 Firefox/3.0.8',
1,
90);
if ($results)
{
//do something with the data
}
else
{
// proxy probably failed, try looping around or give up and wait for the next round of cron
// alternatively update your proxy list to note the failure
}
I haven't really been following this a whole lot, but I may have what you need. What exactly are you looking for?Now if someone can just implement a pure PHP proxy finder, we could skip all this manual save-shit-to-text-files stuff and I'd just have a bunch of proxies ready to go in my database all the time without my intervention
shoot me a PM if you want me to look at what you've got. I'm not big on having formal assignments, but I may throw some shit together here and there as I'm working on a project to take over the internets with cron jobs
<?php
class ProxyParser
{
var $ch;
var $data;
var $origdata;
var $url;
var $proxies=array();
function ProxyParser()
{
$this->init();//initialize our CURL handle and set useragents and whatnot
}
function close()//close the curl handle
{
curl_close($this->ch);
}
function getPage($url) //fetch the URL
{
$this->url=$url;
curl_setopt($this->ch, CURLOPT_URL, $this->url);
$this->origdata=curl_exec($this->ch);
$this->data=$this->origdata;
$this->clean();
return($this->origdata);
}
function getLastURL()
{
return($this->url);
}
function clean()//clean up the data so that we can parse either lists OR tables/div seperated proxies from a page
{
$this->data=preg_replace("/<script(.*?)\/script>/si","",$this->origdata);
$this->data=preg_replace("/<style(.*?)\/style>/si","",$this->data);
$this->data=preg_replace("/<(.*?)>/i",":", $this->data);
$this->data=str_replace("\r","", $this->data);
$this->data=str_replace("\n",":", $this->data);
$this->data=str_replace("\t",":", $this->data);
$this->data=str_replace(" ",":", $this->data);
for($i=0; strpos("::", $this->data)!==FALSE && $i<50; $i++)//this is only a for loop so it has a max # of iterations and won't rape the server for huge, unexpected pages
{
$this->data=str_replace("::", ":", $this->data);
}
$this->data=$this->data.":";//so we can match an ip, even if it's at the end. Otherwise regex would fail
}
function addProxies($arr)//add the proxies, remove duplicates. If a single proxy is passed, it's added. If an array is passed, it's added.
{
if(is_array($arr))
{
for($i=0; $i<sizeof($arr); $i++)
{
if(!in_array(trim($arr[$i]), $this->proxies))
{
$this->proxies[]=$arr[$i];
}
}
}
else if(is_string($arr))
{
if(!in_array(trim($arr), $this->proxies))
{
$this->proxies[]=$arr;
}
}
}
function getProxies()//return the list of proxies
{
return($this->proxies);
}
function clearProxies() //clear the list of proxies
{
$this->proxies=array();
}
function loadProxies()//locate the proxies within $this->data (the modified/cleaned data)
{
preg_match_all("/(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?):(\d{1,6})/si", $this->data, $out);
if(sizeof($out[0])>0)
{
$this->addProxies($out[0]);
return(true);
}
else
{
return(false);
}
}
function init()
{
$this->ch=curl_init();
$useragent="Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1";
curl_setopt($this->ch, CURLOPT_USERAGENT, $useragent);
curl_setopt($this->ch, CURLOPT_SSL_VERIFYPEER, false); //fuck SSL
curl_setopt($this->ch, CURLOPT_RETURNTRANSFER ,1);
curl_setopt($this->ch, CURLINFO_HEADER_OUT, true);
curl_setopt($this->ch, CURLOPT_FOLLOWLOCATION ,1);
curl_setopt($this->ch, CURLOPT_HEADER, 0); //fuck headers
curl_setopt($this->ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($this->ch, CURLOPT_AUTOREFERER, 0);
curl_setopt($this->ch, CURLOPT_REFERER, "");
}
}
?>