Code : Tout sélectionner
function in_multi_array($value, $array, $prof=0)
{
foreach ($array as $key => $item)
{
if (!is_array($item))
{
if ($item == $value) return true;
}
else
{
if (in_array($value, $item)) return true;
else if (in_multi_array($value, $item, ($prof+1))) return true;
}
}
return false;
}
Code : Tout sélectionner
Array
(
[0] => array(0, 'http://www.jscourses.com')
[1] => array(1, 'http://www.jscourses.com/')
[2] => array(1, 'http://www.jscourses.com/services')
[3] => array(2, 'http://www.jscourses.com/tarifs')
[4] => array(2, 'http://www.jscourses.com/reserver')
[5] => array(0, 'http://www.jscourses.com/gare-de-lyon')
[6] => array(0, 'http://www.jscourses.com/gare-de-lyon/taxi-moto-gare-de-lyon')
Code : Tout sélectionner
Array
(
[0] => array(0, 'http://www.jscourses.com')
[1] => array(1, 'http://www.jscourses.com', 'http://www.jscourses.com/')
[2] => array(1, 'http://www.jscourses.com', 'http://www.jscourses.com/services')
[3] => array(2, 'http://www.jscourses.com/services', 'http://www.jscourses.com/tarifs')
[4] => array(2, 'http://www.jscourses.com/services', 'http://www.jscourses.com/reserver')
[5] => array(0, 'http://www.jscourses.com/gare-de-lyon')
[6] => array(0, 'http://www.jscourses.com/gare-de-lyon/taxi-moto-gare-de-lyon')
Code : Tout sélectionner
Array
(
[0] => array('niv' => 0, 'adrs' => 'http://www.jscourses.com', 'orig' => '')
[1] => array('niv' => 1, 'adrs' => 'http://www.jscourses.com', 'orig' => 'http://www.jscourses.com/')
[2] => array('niv' => 1, 'adrs' => 'http://www.jscourses.com', 'orig' => 'http://www.jscourses.com/services')
[3] => array('niv' => 2, 'adrs' => 'http://www.jscourses.com/services', 'orig' => 'http://www.jscourses.com/tarifs')
[4] => array('niv' => 2, 'adrs' => 'http://www.jscourses.com/services', 'orig' => 'http://www.jscourses.com/reserver')
[5] => array('niv' => 0, 'adrs' => 'http://www.jscourses.com/gare-de-lyon'. 'orig' => '')
[6] => array('niv' => 0, 'adrs' => 'http://www.jscourses.com/gare-de-lyon/taxi-moto-gare-de-lyon','orig' => '')
Code : Tout sélectionner
function crawl($url, &$urls, $limite, $prof=0, $orig='') //un changement apporté ici
{
$code_brute = crawlcurl($url);
$doc = new DOMDocument();
$doc->loadHTML($code_brute);
$balise_a = $doc->getElementsByTagName('a');
foreach($balise_a as $balise)
{
$href = $balise->getAttribute('href');
// if( !in_multi_array($href, $urls) && strstr($href, racine($url)) && !strstr($href, 'pdf') )
// Nouveau centre de traitement
if (is_array($href)) {
foreach ($href as $ind => $val ) { crawl($val, $urls, $limite, ($prof+1), $orig); }
} else {
$urls[] = array('niv' => $niv, 'adrs' =>$href, 'orig' =>$orig);
}
}
return $urls;
}Code : Tout sélectionner
Array
(
[0] => Array
(
[niv] =>
[adrs] => tel:0123456789
[orig] =>
)
[1] => Array
(
[niv] =>
[adrs] => http://www.jscourses.com
[orig] =>
)
[2] => Array
(
[niv] =>
[adrs] => http://www.jscourses.com/
[orig] =>
)
[3] => Array
(
[niv] =>
[adrs] => http://www.jscourses.com/services
[orig] =>
)
[4] => Array
(
[niv] =>
[adrs] => http://www.jscourses.com/tarifs
[orig] =>
)
[5] => Array
(
[niv] =>
[adrs] => http://www.jscourses.com/reserver
[orig] =>
)
[6] => Array
(
[niv] =>
[adrs] => #
[orig] =>
)
[7] => Array
(
[niv] =>
[adrs] => #
[orig] =>
)
[8] => Array
(
[niv] =>
[adrs] => http://www.jscourses.com/reserver
[orig] =>
)
[9] => Array
(
[niv] =>
[adrs] => http://www.jscourses.com/gare-de-lyon
[orig] =>
)
[10] => Array
(
[niv] =>
[adrs] => http://www.jscourses.com/gare-de-lyon/taxi-moto-gare-de-lyon
[orig] =>
)
[11] => Array
(
[niv] =>
[adrs] =>
[orig] =>
)
[12] => Array
(
[niv] =>
[adrs] => http://www.foxinteractive.fr
[orig] =>
)
)
Code : Tout sélectionner
function crawl($url, $limite)
{
$code_brute = crawlcurl($url);
$doc = new DOMDocument();
$doc->loadHTML($code_brute);
$balise_a = $doc->getElementsByTagName('a');
foreach($balise_a as $balise)
{
$href = $balise->getAttribute('href');
if( !in_multi_array($href, $urls) && strstr($href, racine($url)) && !strstr($href, 'pdf') )
{
$ecurls[] = $href;
$urls[] = $href;
}
}
for($i = 0; $i < count($ecurls); $i++)
{
//echo $i . ' - ' . $urls[$i] . '<br>';
if( $limite > 0 )
{
$urls[] = crawl($ecurls[$i], ($limite - 1));
}
}
$limite = ($limite - 1);
return $urls;
}
Code : Tout sélectionner
function crawl($url, $limite=0, $lvl=0)
{
$code_brute = crawlcurl($url);
$doc = new DOMDocument();
$doc->loadHTML($code_brute);
$balise_a = $doc->getElementsByTagName('a');
foreach($balise_a as $balise)
{
$href = $balise->getAttribute('href');
if( !in_multi_array($href, $urls) && strstr($href, racine($url)) && !strstr($href, 'pdf') )
{
$ecurls[] = $href;
$urls[] = array('lvl' => "$lvl", 'parent' => $url, 'url' => $href);
}
}
for($i = 0; $i < count($ecurls); $i++)
{
if( $limite > 0 )
{
$crawl = crawl($ecurls[$i], ($limite - 1), ($lvl + 1));
$urls = array_merge($urls, $crawl);
}
}
$limite = ($limite - 1);
return $urls;
}