Fuck me.
It outputs something like this:
It outputs something like this:
Code:
<?php
// Start the buffering //
ob_start();
?>
<?php
//various test websites
//$str = file_get_contents('http://www.nydailynews.com/sports/olympics-2012/japan-kohei-uchimura-wins-olympic-gold-all-around-competition-american-danell-leyva-earns-bronze-article-1.1126690');
//$str = file_get_contents('http://latimesblogs.latimes.com/lanow/2012/08/chick-fil-a-kissing-protest-gladd.html');
//$str = file_get_contents('http://www.bbc.co.uk/sport/0/olympics/18905658');
$str = file_get_contents('http://www.eonline.com/news/334647/former-olympic-gymnast-nastia-liukin-scores-beauty-of-an-endorsement-named-face-of-tigi');
//$str = file_get_contents('http://forums.darkfallonline.com/forumdisplay.php?f=13');
/*
$desc = $_GET["description"];
$tagging = $_GET["tags"];
$form= '<form action="scrape.php" method="get">
Description: <input type="text" name="description" />
tags: <input type="text" name="tags" />
<input type="submit" />
</form>';
echo $form;
echo $desc;
echo $tagging;
*/
//navigation
$links = '<div id="nav"><a href="">Home</a><a href="" style="margin-left:25px;">Popular News</a><a href="" style="margin-left:25px;">Next Article</a></div>';
//Make it look like you're blogging the story. Should spin recurring keywords instead, this just prototype
//$description = 'Well it looks like chik-fil-a is in the news regarding its CEOs stance on gay rights in America. Gay rights supporters are planning //a National Same-Sex Kiss Day at Chick-fil-A';
$description = 'The olympics are raging in full force. Former U.S. gymnast has just signed on to become TIGIs newest spokesperson, promoting hair products.';
//$tagging = 'chick fil a, chick-fila, chick-fil-a, chickfila, equality, gay men, men kissing';
$tagging = 'olympics, publicity, TIGI, ';
$doc = new DOMDocument();
@$doc->loadHTML('<?xml encoding="UTF-8">' . $str);
$tidy = new tidy();
$tidy->parseFile($str);
$tidy->cleanRepair();
if(!empty($tidy->errorBuffer)) {
echo "The following errors or warnings occured:\n";
echo $tidy->errorBuffer;
}
else {
$str = $tidy;
}
echo '<html><head>'.$nav.'</head><body><div id="headTop" style="border:solid 1px silver;width:90%;min-height:10%;margin-left:5%;">'.$links.'</div><p style="margin-left:75px;">'.$description.'</p>';
echo '<div id="wrapper" style="background:silver;width:90%;min-height:40%;margin-left:5%;">';
$counter = 0;
//create an image for each image source we have
$mybody = $doc->getElementsByTagName('body');
foreach ($mybody as $bod){
$imgpara = $bod->getElementsByTagName('p');
//$divss = $bod->getElementsByTagName('div')->item(0);
$divss = $bod->getElementsByTagName('div')->item(0);
//$imgnotpara = $divss->getElementsByTagName('img');
//foreach($imgnotpara as $imgn ){
//echo '<img src=' . $imgn->getAttribute('src') . ' style="float:left;clear:right;"/><br />';
//}
foreach($imgpara as $imgp){
$tags2 = $imgp->getElementsByTagName('img');
foreach ($tags2 as $tagged) {
echo '<img src=' . $tagged->getAttribute('src') . ' style="float:left;clear:right;"/><br />';
//echo "<br/>" . $tag->getAttribute('href') ;
}
}
}
$imgCount = 0;
//if(preg_match('[img]',$sql)){
$imgnotpara = $doc->getElementsByTagName('img');
foreach($imgnotpara as $imgn ){
$imgCount++;
if($imgCount >2 && $imgCount < 4){
echo '<img src=' . $imgn->getAttribute('src') . ' style="float:left;clear:right;"/><br />';
}
}
//Scrape content in <p> tags
$paragraphs = $doc->getElementsByTagName('p');
foreach ($paragraphs as $pgraph) {
$info = $pgraph->nodeValue;
$counter++;
// echo $pgraph->nodeValue, PHP_EOL;
if($counter > 1 && $counter < 20){
echo '<style = "text/css">div{margin:15px;} img{margin-right:15px;}</style><div id=' . $counter . '>' . $info . '</div>';
}
}
echo '<p style="margin-left:15px;">tags:'.$tagging.'</p></div><div id="foot" style="border:solid 1px silver;width:90%;min-height:10%;margin-left:5%;"><div id="otherNews" style="clear:right;width:30%;min-height:250px;border:solid 1px gray;float:right;"></div><div id="otherNews2" style="width:30%;min-height:250px;border:solid 1px gray;float:right;"></div><div id="otherNews3" style="width:30%;min-height:250px;border:solid 1px gray;"></div></div>';
echo '</body></html>';
/*
//Search the doc for image tags, loop through and echo the source of each img
$bodd = $doc->getElementsByTagName('body')->item(0);
$tags = $bodd->getElementsByTagName('img');
//$tags = $doc->getElementsByTagName('img');
foreach ($tags as $tag) {
echo '<img src=' . $tag->getAttribute('src') . '/>';
echo $tag->getAttribute('src') . "<br/>" ;
}
//if (preg_match('/http/i', $str, $matches)) {
//echo "matched";
//}
*/
//path to directory to scan
$directory = "";
//get all image files with a .html extension.
$images = glob($directory . "*.html");
$otherNews1 = $doc->getElementById('otherNews');
//print each file name
echo '<script type="text/javascript">var ON3 = document.getElementById("otherNews3").innerHTML+="<h4>Other news:</h4><br />";</script>';
foreach($images as $image)
{
echo '<script type="text/javascript">var txt = document.getElementById("otherNews3").innerHTML+="<a href=' . $image . '>'.$image.'</a><br />";</script><style type="text/css"> #otherNews3 a {margin:30px; } </style>';
//$otherNews1.innerHTML+='<a href=' . $image . '>'.$image.'</a><br />';
}
//-----------------------
/*
$dom = new DOMDocument;
$dom->loadHTMLFile('http://latimesblogs.latimes.com/lanow/2012/08/chick-fil-a-kissing-protest-gladd.html');
$xpath = new DOMXPath($dom);
$nodes = $xpath->query('/html/body//img');
foreach($nodes as $node) {
echo '<img src=' . $node->getAttribute('src') . '/>';
printf(
'Element %s - Image: %s%s',
$node->nodeName,
$node->getAttribute('src'),
PHP_EOL
);
}
*/
//---------------------------
/*
$paras = $doc->getElementsByTagName('p');
foreach ($paras as $par) {
echo "<br /> $par ";
//echo "<br/>" . $tag->getAttribute('href') ;
}
*/
/*
// The "i" after the pattern delimiter indicates a case-insensitive search
if (preg_match_all('/<img[^>]+>/i', $str, $matches)) {
echo "A match was found.";
echo $matches;
list($link) = split('[<]', $str);
echo "Month: $link; <br />\n";
} else {
echo "A match was not found.";
}
*/
?>
<?php
// Get the content that is in the buffer and put it in your file //
file_put_contents('chickFila'.$counter.'.html', ob_get_contents());
?>