function do_get_request($url, $optional_headers = null)
{
$params = array('http' => array(
'method' => 'POST'
));
if ($optional_headers !== null) {
$params['http']['header'] = $optional_headers;
}
$ctx = stream_context_create($params);
$fp = @fopen($url, 'rb', false, $ctx);
if (!$fp) {
throw new Exception("Problem with $url, $php_errormsg");
}
$response = @stream_get_contents($fp);
if ($response === false) {
throw new Exception("Problem reading data from $url, $php_errormsg");
}
return $response;
}
function do_post_request($url, $data, $optional_headers = null)
{
$params = array('http' => array(
'method' => 'POST',
'content' => $data
));
if ($optional_headers !== null) {
$params['http']['header'] = $optional_headers;
}
$ctx = stream_context_create($params);
$fp = @fopen($url, 'rb', false, $ctx);
if (!$fp) {
throw new Exception("Problem with $url, $php_errormsg");
}
$response = @stream_get_contents($fp);
if ($response === false) {
throw new Exception("Problem reading data from $url, $php_errormsg");
}
return $response;
}
echo do_get_request("http://google.com");
echo do_post_request("http://google.com", "a=ASDF&b=FDSA&c=FFFF");
...
$scrape = file_get_contents($url);
...
...
$scrape = scrape($url);
...
function scrape($url) {
$scrape = curl_init();
curl_setopt($scrape, CURLOPT_URL, $url);
curl_setopt($scrape, CURLOPT_RETURNTRANSFER, TRUE);
$content = curl_exec($scrape);
curl_close($scrape);
return $content;
}
...
cURL has proxy support . Good for scraping the big G with.
<?php
class curlmulti{
public $threads = 20;
public $timeout = 10; // seconds
public $UA = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.7';
function makeThreads(){
if(count($this->urls)>$this->threads){
$i = 0; $z = 0;
foreach($this->urls as $url){
if( strpos($url,'http://')===false ) $url = "http://".$url;
$this->links[$z][] = $url;
$i++;
if($i == $this->threads){ $i = 0; $z++; }
}
}
else $this->links[0] = $this->urls;
}
function fetch($urls){
$this->urls = $urls;
$this->makeThreads();
foreach($this->links as $urls){
$socketh = curl_multi_init();
foreach($urls as $i => $url){
$socket[$i] = curl_init($url);
curl_setopt($socket[$i], CURLOPT_RETURNTRANSFER, 1);
curl_setopt($socket[$i], CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($socket[$i], CURLOPT_USERAGENT, $this->UA);
curl_setopt($socket[$i], CURLOPT_MAXREDIRS, 4);
curl_setopt($socket[$i], CURLOPT_TIMEOUT, $this->timeout);
curl_setopt($socket[$i], CURLOPT_CONNECTTIMEOUT, 5);
curl_setopt($socket[$i], CURLOPT_SSL_VERIFYPEER, 0);
curl_multi_add_handle($socketh, $socket[$i]);
}
do { $x = curl_multi_exec($socketh, $working); } while( $working );
foreach($urls as $i => $url){
$this->html[] = curl_multi_getcontent( $socket[$i] );
curl_close($socket[$i]);
}
}
return $this->html;
}
}
?>