J'essaye de creer un code qui analyse mon titre, mon sous titre et mon text et en ressortir les 10 meilleurs keywords.
Pour ca je donne un poid de 5 pour les mots du titre, de 3 pour le soustitre et 1 pour le texte.
Puis je supprime les mots comme donc, en, fait, pour, qui,....
Voici mon code... Si quelqu'un peut y jeter un oeil et me dire ce qui cloche..
Il me ressort les mots mais sans les filtrer ou mal..
Merci beaucoup
function keywords()
{
$myFile = "testFile.txt";
$fh = fopen($myFile, 'r');
$theData = fgets($fh);
fclose($fh);
$filterarray = explode(" ", $theData);
$title = " starbucks a a a a coffee company computer company company shop shop computer shop good better than other coffee shop ";
$subtitle = " the the the the publication good computer the company";
$body = "hi every every every every every good every every computer good every every one how good how is going" ;
$a_title = explode(" ", $title );
$a_subtitle = explode(" ", $subtitle );
$a_body = explode(" ", $body );
$counttitle = array(sizeof($title));
$countsubtitle = array(sizeof($subtitle));
$countbody = array(sizeof($body));
$costttitle = array(sizeof($title));
$costsubtitle = array(sizeof($subtitle));
$costbody = array(sizeof($body));
$keytitle = array(sizeof($title));
$keysubtitle = array(sizeof($subtitle));
$keybody = array(sizeof($body));
$wsordcount= 0;
for($i=0; $i<count($filterarray); $i++)
{
echo"$filterarray[$i]<br>";
}
//for generating count for title
for($i=0;$i<sizeof($a_title);$i++)
{
if($a_title[$i]!="")
{
$wsordcount = substr_count($title,$a_title[$i]);
$counttitle[$i] = $wsordcount;
}
}
//for generating count for sub title
for($i=0;$i<sizeof($a_subtitle);$i++)
{
if($a_subtitle[$i]!="")
{
$wsordcount = substr_count( $subtitle, $a_subtitle[$i]);
$countsubtitle[$i] = $wsordcount;
}
}
//for generating count for body
for($i=0;$i<sizeof($a_body);$i++)
{
if($a_body[$i]!="")
{
$wsordcount = substr_count($body,$a_body[$i]);
$countbody[$i] = $wsordcount;
}
}
// for generating Cost
for($i=0;$i<sizeof($a_title);$i++)
{
$costttitle[$i]= $counttitle[$i] * 5;
}
for($i=0;$i<sizeof($a_subtitle);$i++)
{
$costsubtitle[$i]= $countsubtitle[$i] * 3;
}
for($i=0;$i<sizeof($a_body);$i++)
{
$costbody[$i]= $countbody[$i] * 2;
}
$mymax=sizeof($a_title);
// Gerating Key
$stw= "XX";
for ($j=0; $j <= $mymax ; $j++)
{
$k= $j;
$str1 = $a_title[$j];
for($i=$j+1; $i<=$mymax; $i++)
{
if ((strcmp( $str1, $a_title[$i])) == 0)
{
$a_title[$i] = $stw;
}
}
}
$mymaxsubtitle = sizeof($a_subtitle);
for ($j=0; $j <= $mymaxsubtitle ; $j++)
{
$k= $j;
$str1 = $a_subtitle[$j];
for($i=$j+1; $i<=$mymaxsubtitle; $i++)
{
if ((strcmp( $str1, $a_subtitle[$i])) == 0)
{
$a_subtitle[$i] = $stw;
}
}
}
$mymaxbody = sizeof($a_body);
for ($j=0; $j <= $mymaxbody ; $j++)
{
$k= $j;
$str1 = $a_body[$j];
for($i=$j+1; $i<=$mymaxbody; $i++)
{
if ((strcmp( $str1, $a_body[$i])) == 0)
{
$a_body[$i] = $stw;
}
}
}
//---------------------
$stw= "XX";
$k=0;
$max = count($a_title);
for($i=0; $i<$max; $i++)
{
$temp = $a_title[$i];
if ((strcmp( $stw , $a_title[$i])) != 0)
{
$keytitle[$k] = $temp ;
$cost = $costttitle[$i] ;
$costttitle[$k] = $cost;
$k++;
}
}
$stw= "XX";
$k=0;
$maxsub = sizeof($a_subtitle);
for($i=0; $i<= $maxsub; $i++)
{
if ((strcmp( $stw , $a_subtitle[$i])) != 0)
{
$keysubtitle[$k] = $temp ;
$cost = $costsubtitle[$i] ;
$costsubtitle[$k] = $cost;
$k++;
}
}
//-----------------------------------
$stw= "XX";
$k=0;
$maxbody= count($a_body);
for($i=0; $i<=$maxbody; $i++)
{
//$cost = $costttitle[$i];
$temp = $a_body[$i];
if ((strcmp( $stw , $a_body[$i])) != 0)
{
$keybody[$k] = $temp ;
$cost = $costbody[$i] ;
$costbody[$k] = $cost;
$k++;
}
}
$maxtitle = count($keytitle);
$maxsub= count($keysubtitle);
for($i=0; $i<$maxtitle; $i++)
{
$temp= $keytitle[$i];
// echo "cost = $costttitle[$i] and temp = $temp <br>";
for($j=0;$j<$maxsub;$j++)
{
if ((strcmp( $temp , $keysubtitle[$j])) == 0)
$costttitle[$i]= (int) $costsubtitle[$j] + (int) $costttitle[$i] ;
}
// echo "cost = $costttitle[$i] and temp = $temp <br>";
}
$counter = count($keytitle);
// echo "<br> $counter";
//-------------for adding to tile
$maxkey = count($keysubtitle);
for($i=0; $i<$maxkey; $i++)
{
$counter++;
$temp= $keysubtitle[$i];
// echo "cost = $costttitle[$i] and temp = $temp <br>";
for($j=0;$j<$maxkey;$j++)
{
if ((strcmp( $temp , $keybody[$j])) == 0)
{
$costbody[$i]= (int)$costsubtitle[$i] + (int)$costbody[$j] ;
// echo " <br>trying $costbody[$i]";
}
//$keytitle[$counter]= $keysubtile[$i];
//$costttile[$counter] = $costbody[$counter];
// echo "cost = $costbody[$counter] and temp = $temp <br>";
}
$keytitle[$counter]= $keysubtitle[$i];
$costttile[$counter] = $costbody[$i];
// echo "cost = $costttile[$counter] and temp = $temp <br>";
}
$maxbody = count($keybody);
for($i=0; $i<= $maxbody; $i++)
{
$counter++;
$keytitle[$counter] = $keybody[$i];
$costttitle[$counter] = $costbody[$i];
}
for($i=0; $i<30; $i++)
{
}
//---------------------------------------------------check this out ---------------------------------------------
$stw= "XX";
$mymax = count($keytitle);
for ($j=0; $j <= $mymax ; $j++)
{
$k= $j;
$str1 = $keytitle[$j];
// echo "hello";
for($i=$j+1; $i<=$mymax; $i++)
{
if ((strcmp( $str1, $keytilte[$i])) == 0)
{
// $keytitle[$i] = $stw;
//$a_title[$i] ="XX";
}
}
// echo "$keytitle[$j] <BR>";
}
//-------------------------for replacing filteres
for($i=0; $i< $mymax; $i++)
{
// echo"$keytitle[$i]<br>";
}
echo"<BR> before <BR>";
for($i=0; $i<count($keytitle); $i++)
{
echo"$keytitle[$i]<br>";
}
for ($k=0; $k <= count($keytitle) ; $k++)
{
//$k= $j;
$str1 = $keytitle[$k];
// echo "string is $str1";
for($i=0; $i<= sizeof($filterarray); $i++)
{
if ((strcmp( $str1, $filterarray[$i])) == 0)
{
// $a_title[$i] ="XX";
$keytitle[$k];
//"$keytitle[$j] <BR>";
}
}
// echo "$keytitle[$j] <BR>";
}
echo"<BR> next <BR>";
for($i=0; $i<count($keytitle); $i++)
{
echo"$keytitle[$i]<br>";
}
$k=0;
$maxkey = count($keytitle) + count($keybody);
for($i=0; $i<= 30; $i++)
{
// echo" <BR>$keytitle[$i] and cost = $costttitle[$i] ";
if ((strcasecmp( $stw, $keytitle[$i]))!= 0)
{
$keyword[$k] = $keytitle[$i];
$keycost[$k] = $costttitle[$i];
$k++;
//$keytitle[$j] = $stw;
//$a_title[$i] ="XX";
}
}
//---------------------------------------------
$maxkeyword= count($keyword);
for( $i= 0; $i<=$maxkeyword; $i++)
{
$temp = $keycost[$i];
$temp1 = $keycost[$i];
$id = $i;
$temp2 = $keyword[$i];
for ($j=$i; $j<$maxkeyword; $j++)
{
if($temp1 <= $keycost[$j])
{
$temp1= $keycost[$j];
$tid=$j;
}
}
$keycost[$id] = $temp1;
$keycost[$tid] = $temp;
$keyword[$id] = $keyword[$tid];
$keyword[$tid] = $temp2;
}
for($i=0; $i<$maxkeyword; $i++)
{
// if((strcasecmp( " ", $$keyword[$i])) != 0)
// echo" $keyword[$i] ";
}
}
keywords();
[php] ?>
[/php]