Curl script assistance

Status
Not open for further replies.

msm365

New member
Oct 27, 2008
11
0
0
Hi coders,

Im busy struggling with a curl script and am pulling my hair out at the mo.

All I want to do is to login into answers.yahoo.com, with
the provision of a username and password.

Currently the script seems to post, but comes up with a secondary screen
asking for the password again! Username is filled in (probably something to do with cookies)
And Im struggling at this point to re-curl in with the password...

I hope the following below request is allowed:

Anyone willing to look at fixing/developing this script for me
Willing to pay up to $50 by paypal
Needs to be very commented code - cos Im struggling to find where
Im going wrong - and so I can understand the code as well

Oh - needs to be PHP but thats self explanatory

Thanks
 


Can I have a look at the script you have made till now? Plus is your aim retrieving data or posting data?
 
Fuck your PHP

Code:
import pycurl
from StringIO import StringIO
import urllib

def setup_curl(url):
    c = pycurl.Curl()
    c.setopt(pycurl.USERAGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)")
    c.setopt(pycurl.HTTPHEADER, ["Accept-Language: en-us,en;q=0.5", "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7", "Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"])
    c.setopt(pycurl.URL, url)
    c.setopt(pycurl.FOLLOWLOCATION, 1)
    c.setopt(pycurl.MAXREDIRS, 10)
    c.setopt(pycurl.COOKIEFILE, '')

    return c

def login(login, passwd):
    c = setup_curl('https://login.yahoo.com/config/login')
    resp = StringIO()
    c.setopt(pycurl.WRITEFUNCTION, resp.write)
    c.setopt(pycurl.POST, 1)

    args = urllib.urlencode({
        'login': login,
        'passwd': passwd
    })

    c.setopt(pycurl.POSTFIELDS, args)
    try: c.perform()
    except: return None

    resp = StringIO()
    c.setopt(pycurl.WRITEFUNCTION, resp.write)
    c.setopt(pycurl.URL, 'http://answers.yahoo.com')
    try: c.perform()
    except: return None

    return resp.getvalue()

if __name__ == '__main__':
    resp = login('username@yahoo.com', 'passwd')
    f = file('out.html', 'w')
    f.write(resp)
    f.close()
 
I give you guys _working_ code and you're stuck with your heads up your asses. Beautiful.
 
Code

David - thanks for the code - yeah we still got our head up our asses!
Me personally, I need something in php - yeah i know F$%k php

anyways herewith what ive got:

Code:
<?php


function curl_get_file_contents($URL)
    {
        $c = curl_init();
        curl_setopt($c, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($c, CURLOPT_URL, $URL);
        $contents = curl_exec($c);
        curl_close($c);

        if ($contents) return $contents;
            else return FALSE;
    }

$url = 'http://my.yahoo.com';

$site = 'https://login.yahoo.com/config/login?.done=http://answers.yahoo.com%2findex&.src=knowsrch&.intl=us';
$sitetext = curl_get_file_contents($site);
//echo $sitetext;

$posturl = 'https://login.yahoo.com/config/login?';

// Get the hidden fields
        
$pattern = '<input type="hidden" name=".tries" value="(.*)">';
preg_match( $pattern, $sitetext, $matches );
//print_r($matches);
$tries = $matches[1];

$pattern = '<input type="hidden" name=".src" value="(.*)">';
preg_match( $pattern, $sitetext, $matches );
$src = $matches[1];

$pattern = '<input type="hidden" name=".md5" value="(.*)">';
preg_match( $pattern, $sitetext, $matches );
$md5 = $matches[1];


$pattern = '<input type="hidden" name=".hash" value="(.*)">';
preg_match( $pattern, $sitetext, $matches );
$hash = $matches[1];


$pattern = '<input type="hidden" name=".js" value="(.*)">';
preg_match( $pattern, $sitetext, $matches );
$js = $matches[1];


$pattern = '<input type="hidden" name=".last" value="(.*)">';
preg_match( $pattern, $sitetext, $matches );
$last = $matches[1];


$pattern = '<input type="hidden" name=".promo" value="(.*)">';
preg_match( $pattern, $sitetext, $matches );
$promo = $matches[1];

$pattern = '<input type="hidden" name=".intl" value="(.*)">';
preg_match( $pattern, $sitetext, $matches );
$intl = $matches[1];

$pattern = '<input type="hidden" name=".bypass" value="(.*)">';
preg_match( $pattern, $sitetext, $matches );
$bypass = $matches[1];


$pattern = '<input type="hidden" name=".partner" value="(.*)">';
preg_match( $pattern, $sitetext, $matches );
$partner = $matches[1];

$pattern = '<input type="hidden" name=".u" value="(.*)">';
preg_match( $pattern, $sitetext, $matches );
$u = $matches[1];

$pattern = '<input type="hidden" name=".v" value="(.*)">';
preg_match( $pattern, $sitetext, $matches );
$v = $matches[1];


$pattern = '<input type="hidden" name=".challenge" value="(.*)">';
preg_match( $pattern, $sitetext, $matches );
$challenge = $matches[1];

$pattern = '<input type="hidden" name=".yplus" value="(.*)">';
preg_match( $pattern, $sitetext, $matches );
$yplus = $matches[1];


$pattern = '<input type="hidden" name=".emailCode" value="(.*)">';
preg_match( $pattern, $sitetext, $matches );
$emailcode = $matches[1];


$pattern = '<input type="hidden" name=".pkg" value="(.*)">';
preg_match( $pattern, $sitetext, $matches );
$pkg = $matches[1];

$pattern = '<input type="hidden" name=".stepid" value="(.*)">';
preg_match( $pattern, $sitetext, $matches );
$stepid = $matches[1];

$pattern = '<input type="hidden" name=".ev" value="(.*)">';
preg_match( $pattern, $sitetext, $matches );
$ev = $matches[1];


$pattern = '<input type="hidden" name=".hasMsgr" value="(.*)">';
preg_match( $pattern, $sitetext, $matches );
$hasmsgt = $matches[1];

$pattern = '<input type="hidden" name=".chkP" value="(.*)">';
preg_match( $pattern, $sitetext, $matches );
$chkp = $matches[1];

$pattern = '<input type="hidden" name=".done" value="(.*)">';
preg_match( $pattern, $sitetext, $matches );
$done = $matches[1];


$pattern = '<input type="hidden" name=".pd" value="(.*)">';
preg_match( $pattern, $sitetext, $matches );
$pd = $matches[1];

$pattern = '<input type="hidden" name=".u" value="(.*)">';
preg_match( $pattern, $sitetext, $matches );
$u = $matches[1];

// Get the Post Data

    
        $post_data = array();
        $post_data['login'] = 'usernamehere';
        $post_data['passwd'] = 'passwordhere';
        $post_data['.tries'] = $tries;
        $post_data['.src'] = $src;
        $post_data['.md5'] = $md5;
        $post_data['.hash'] = $hash;
        $post_data['.js'] = $js;
        $post_data['.last'] = $last;
        $post_data['.promo'] = $promo;
        $post_data['.intl'] = $intl;
        $post_data['.bypass'] = $bypass;
        $post_data['.partner'] = $partner;
        $post_data['.u'] = $u;
        $post_data['.v'] = $v;
        $post_data['.challenge'] = $challenge;
        $post_data['.yplus'] = $yplus;
        $post_data['.emailCode'] = $emailcode;
        $post_data['pkg'] = $pkg;
        $post_data['stepid'] = $stepid;
        $post_data['.ev'] = $ev;
        $post_data['hasMsgr'] = $hasmsgr;
        $post_data['.chkP'] = $chkp;
        $post_data['.done'] = $done;
        $post_data['.pd'] = $pd;
        $post_data['.persistent'] = 'n';



        foreach ($post_data as $key => $value) {
        $post_items[] = $key . '=' . $value;
        }
        $post_string = implode ('&', $post_items);
        //echo $post_string;

        $cookie = MD5($post_data['login']);

        $curl_connection = curl_init($posturl);
        curl_setopt($curl_connection, CURLOPT_COOKIEJAR, $cookie);
        curl_setopt($curl_connection, CURLOPT_COOKIEFILE, $cookie );
        curl_setopt($curl_connection, CURLOPT_COOKIE, $cookie);
        //curl_setopt($curl_connection, CURLOPT_CONNECTTIMEOUT, 30);
        curl_setopt($curl_connection, CURLOPT_USERAGENT, "YahooSeeker-Testing/v3.9 (compatible; Mozilla 4.0; MSIE 5.5; http://search.yahoo.com/)");
        curl_setopt($curl_connection, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($curl_connection, CURLOPT_AUTOREFERER, false);
        //curl_setopt($curl_connection, CURLOPT_REFERER, $url);
        curl_setopt($curl_connection, CURL_SSL_VERIFYPEER, false);
        //curl_setopt($curl_connection, CURLOPT_FOLLOWLOCATION, 0);

        curl_setopt($curl_connection, CURLOPT_POSTFIELDS, $post_string);

        $result = curl_exec($curl_connection);

echo $result;

// Set landing page options
curl_setopt( $ch, CURLOPT_REFERER, '' );
curl_setopt($ch, CURLOPT_URL, 'https://login.yahoo.com/config/login?');
curl_setopt($ch, CURLOPT_COOKIEFILE, 'cookie.txt' );

curl_setopt( $ch, CURLOPT_POST, FALSE );
curl_setopt( $ch, CURLOPT_POSTFIELDS, NULL );
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
// Retrieve source code
$result = curl_exec( $ch );
curl_close( $ch );

echo $result;

?>
It aint pretty coding - so shoot me!!:costumed-smiley-015

username and password to be entered.

Quick note - beto helped me out with a command line version - but id still like to get one where it can be launched via the browser

Any assistance greatly appreciated!
 
You should be doing something like this. Your not getting the proper cookies, looks like when you initially log in you don't have cookies enabled yet.

Code:
<?php    
    //file path to cookie.
    //this is a linux path because that's what I use
    //as long as you can write to it, its ok
    $cookie = "/tmp/cookie.txt";  
    
    
    $url = "http://www.domain.com/login.php";
    $url2 = "Another URL";
     
    $agent = "User-Agent";
        
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
    curl_setopt($ch, CURLOPT_COOKIESESSION, TRUE);
    curl_setopt($ch, CURLOPT_HEADER, 0);
    curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie);
    curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie);
    curl_setopt($ch, CURLOPT_USERAGENT, $agent);
    curl_setopt($ch, CURLOPT_COOKIE, session_name() . '=' . session_id());
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);

    //visit login page
    //pick up any cookies that get set 
    //not needed on all sites
    curl_setopt($ch, CURLOPT_URL, $url);
    curl_exec($ch);

    //pass in login stuff
    curl_setopt($ch, CURLOPT_POSTFIELDS, $post);
    curl_exec($ch);
    
    //Go to the page we want to scrape
    curl_setopt($ch, CURLOPT_URL, $url2);
    curl_setopt($ch, CURLOPT_POSTFIELDS, 0);
    //$rawdata holds the pages raw data
    $rawdata = curl_exec($ch);
    
    curl_close($ch);
    
    ?>
 
anyways herewith what ive got:

....

Any assistance greatly appreciated!

Gah! Sorry for the late reply - I completely forgot I'd asked you to post some code and said I'd come back to you.

Anyway, I have some nice code for you that works for what you want. Just fill in your username and password at the bottom.

To add further functions that use the same login cookie, just add more functions to the social_post class, and use "$this->curl" to connect. I added in a "yahoo_check_account" function as an example.

Here's the code:
Code:
<?
/*
* Curl class
*/
class curl_class{
    
    function curl_class(){ $this->__construct($array=null); }
    function __construct($array=null) {
    
        /*
        Fill class with default variables
        */    
        if(is_array($array)) {
            foreach($array AS $key=>$value) {
            $this->$key = $value;
            }
        }    
        
        /*
        Specify cookie file
        */    
        if(!$this->cookie){
            $this->cookie = tempnam("tmp","COOK");
        }
        
        /*
        Default user agent
        */            
        if(!$this->user_agent) {            
            $this->user_agent = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727)';        
        }

        /*
        Init curl
        */        
        $this->curl = curl_init();
        
        $this->configCurl(CURLOPT_SSL_VERIFYPEER, 0);
        $this->configCurl(CURLOPT_SSL_VERIFYHOST, 0);
        $this->configCurl(CURLOPT_RETURNTRANSFER, 1);
        $this->configCurl(CURLOPT_AUTOREFERER, 1);
        $this->configCurl(CURLOPT_COOKIEFILE, $this->cookie);
        $this->configCurl(CURLOPT_COOKIEJAR, $this->cookie);        
        
        if($this->ajax) {
            $str[]  = "X-Requested-With: XMLHttpRequest";
        }
        
        if($this->prototype) {
            $str[]  = "X-Prototype-Version: ". $this->prototype;
        }        
        
        if($this->prototype || $this->ajax) {        
             $this->configCurl(CURLOPT_HTTPHEADER, $str);
        }
                
        if($this->debug) {
        $GLOBALS['fd'] = fopen($_SERVER['DOCUMENT_ROOT']."/webadmin/log/" . "error.txt", "a+");
        $this->configCurl(CURLOPT_VERBOSE, 1);    
        $this->configCurl(CURLOPT_STDERR, $GLOBALS['fd']);    
        }
        
        /*
        Set timeout/
        */            
        $this->setTimeout($this->connect,$this->transfer);


    }

    /*
    Set a curl option
    */    
    function configCurl($option, $value){
        return @curl_setopt ($this->curl, $option, $value);
    }


    /*
    Set curl timeout
    */    
    function setTimeout($connect=20, $transfer=10) {
    
        $this->configCurl(CURLOPT_CONNECTTIMEOUT, $connect);    
        $this->configCurl(CURLOPT_TIMEOUT, $transfer);    


    }

    /*
    Return curl error
    */    
    function get_error_code() {

        return curl_errno($this->curl) ? curl_error($this->curl) : false;

    }


    /*
    Return page http code
    */    
    function get_response_code() {

        return curl_getinfo($this->curl, CURLINFO_HTTP_CODE);

    }


    /*
    HTTP authentication
    */    
    function auth($user, $pass) {

        $this->authinfo = "$user:$pass";        

    }

    /*
    POST function
    */    
    function post($url, $referer = '', $post_string=null) {
    
        //Reset options (but using same cookie)
        $this->__construct();    

        $this->url = $url;
        $this->post_string = $post_string;
                
        //Set curl options
        $this->configCurl(CURLOPT_URL, $this->url);    
        $this->configCurl(CURLOPT_POSTFIELDS, $this->post_string);    
        
        if($this->authinfo) {
            $this->configCurl(CURLOPT_USERPWD,$this->authinfo);                
        }        
    
        //Set ref
        if($referer){
            $this->configCurl(CURLOPT_REFERER, $referer);    
        }


        //Do we need to follow manually?
        if($this->nofollow) {
            $this->configCurl(CURLOPT_FOLLOWLOCATION, false);            
        } else {        
            $this->follow_manually = !$this->configCurl(CURLOPT_FOLLOWLOCATION, true);                    
        }

        

        $page = curl_exec($this->curl);
        $error = curl_errno($this->curl);    

        if ($error != CURLE_OK || empty($page)) {

            return false;

        }        
        
        if($this->follow_manually) {
        return $this->manual_follow($page);
        } else {
        return $page;        
        }



    }

    /*
    GET function
    */    
    function get($url, $referer = '') {
        
        //Reset options (but using same cookie)
        $this->__construct();

        //Set curl options
        $this->configCurl(CURLOPT_URL, $url);    
        
        if($this->authinfo) {
            $this->configCurl(CURLOPT_USERPWD,$this->authinfo);                
        }        
    
        //Set ref
        if($referer){
            $this->configCurl(CURLOPT_REFERER, $referer);    
        }


        //Do we need to follow manually?
        if($this->nofollow) {
            $this->configCurl(CURLOPT_FOLLOWLOCATION, false);            
        } else {        
            $this->follow_manually = !$this->configCurl(CURLOPT_FOLLOWLOCATION, true);                    
        }


        $page = curl_exec($this->curl);
        $error = curl_errno($this->curl);    

        if ($error != CURLE_OK || empty($page)) {

            return false;

        }        
        
        if($this->follow_manually) {
        return $this->manual_follow($page);
        } else {
        return $page;        
        }

    }

    /*
    Read headers to follow a location manually
    */        
    function manual_follow($page) {
    
    //Need header for this
    $this->configCurl(CURLOPT_HEADER, 1);
    
    //Check is follow
    if(substr($this->get_response_code(),0,2) != "30") {
    $this->follow_url = "";
    } else {    
    preg_match("@Location:(.*?)\n@i", $page, $matches);
    $this->follow_url = trim($matches[1]);    
    }
    
    if($this->follow_url) {
    return $this->get($this->follow_url);
    } else {
    return $page;
    }
    
    
    }



};
//-----------------------------------------------------------------

class social_post {

    //Constructor
    function social_post(){ $this->__construct(); }
    function __construct($array=null) {    
        if(is_array($array)) {
            foreach($array AS $key=>$value) {
            $this->$key = $value;
            }
        }
    }
    
    /**
    Get fields in a page
    */
    function get_hiddens($page,$type='hidden') {
    
        preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$page,$elements);
    
        if(is_array($elements[0])) {
        
            foreach($elements[0] AS $key=>$value) {
                
                if(preg_match("@type=('|\")?$type@",$value)) {
                    
                    preg_match_all("@(name|value)=('|\")?([^('|\"|>|\s)]+)@",$value,$matches);
                    if(is_array($matches)) {
                    $return[$matches[3][0]] = $matches[3][1];
                    }
                
                }
            
            }
        
        }
        
        return $return;
    
    
    }        

    //Login into yahoo
    function yahoo_login($username,$password) {
    
        //Get login page
        $login_page = $this->curl->get('https://login.yahoo.com/config/login');
        
        //Get hidden fields
        $this->hiddens = $this->get_hiddens($login_page);

        //Query stringify (PHP4 compat)
        foreach($this->hiddens AS $key=>$value) {
        $query_string .= "$key=$value&";
        }
        
        //Add username and password
        $query_string = "" . $query_string . "login=$username&passwd=$password&.save=Sign+In";
                
        //Login in
        $page = $this->curl->post('https://login.yahoo.com/config/login', '' , $query_string);
    
    
    }
    
    //Get the answers homepage to check we are logged in
    function yahoo_check_account() {
    
        //Get answers homepage
        $page = $this->curl->get('http://answers.yahoo.com/');
        
        //Who are we?
        preg_match("@Welcome, <strong>(.*?)</strong>@",$page,$matches);
        
        echo "You are logged in as " . $matches[1];
    
    
    }
       

}

//Instantiate curl class
$curl = new curl_class(array('debug'=>false));

//Instantiate our social site class, using the curl object
$social_post = new social_post(array('curl'=>$curl));

//Now we can login
$social_post->yahoo_login("username","password");

//As we are still using the same class (with the same curl instance), we will be logged in for subsequent functions    
$social_post->yahoo_check_account();

?>
 
You beauty...

Rep given works like a charm. Ill now need to scour your code to see where I was going wrong!

thanks once again!
 
Status
Not open for further replies.