| 
<!Doctype html>
<html>
 <head>
 <title>My crawler</title>
 </head>
 <body>
 
 <?php
 
 //error_reporting(-1);
 
 /*Example link crawler
 *@package :Simple crawler
 *author : Trev Tune
 */
 
 include 'crawler.class.php';
 
 /* @seenlinks
 *
 *A multidimentional array of all crawled links
 *
 *e.g print_r($seenlinks) may produce
 array(
 [domain]=>array( [link1]=>link;
 )
 */
 
 
 $url=trim($_GET['url']);
 
 $seenlinks=array();
 
 
 crawl($url,2);
 
 
 function crawl($url,$depth=2,$singledomain=false)
 {
 
 $domain=host($url);
 
 global $seenlinks;
 
 //Have we crawled into the specified depth
 
 if ($depth==0)
 return;
 
 //Have we crawled this url
 
 if(isset($seenlinks[$domain][$url]))
 return;
 
 $crawler=new crawler($url);
 
 if(!$crawler)
 return;
 
 //$crawler->getLinks();
 
 //Add to array
 
 
 $seenlinks[$domain][$url]=$url;
 
 $links=$crawler->getLinks();
 
 
 foreach($links as $link=>$a)
 
 {
 
 //Does user want to crawl only a specific domain
 
 if($singledomain)
 {
 if($domain=!host($link))
 break;}
 
 
 crawl($link,$depth-1,$singledomain);}
 }
 
 function host($url)
 {
 /*
 $host = parse_url($url);
 $host = $host['host'];
 */
 
 $host = str_ireplace('www.','', parse_url($url, PHP_URL_HOST));
 
 return $host;
 }
 
 if(count($seenlinks)==0)
 {die("No links found for $url");}
 
 echo "
 
 <div class='crawler'>
 Domains/subdomains found = " . count($seenlinks) . "<br/>";
 
 
 foreach($seenlinks as $domain=>$links){
 
 echo "<br/> Domain ".$domain. " has "  . count($links) . "links <br>";
 
 foreach($links as $link){
 {
 echo " <br/>  $link <br/>";
 
 }
 
 echo "<hr/>";
 }
 
 echo "<hr/>";
 }
 
 ?>
 
 </div>
 </body>
 </html>
 |