Après avoir lu un article fort intéressant sur le blog du Snook j'ai écris une fonction assez simple et qui ce révéle particulièrement efficace. Comme je suis drôlement sympa, je la partage avec vous.
En savoir plus...
<?php
function wd_spamScore($body, $url, $author, $words=NULL, $starters=NULL)
{
#
# score >= 1 - The message doesn't look like spam
# score == 0 - The message should be put to moderation
# score < -10 - The message is most certainly spam
#
$score = 0;
#
# put our body in lower case for checking
#
$body = strtolower($body);
#
# how many links are in the body ?
#
$n = max
(
array
(
substr_count($body, 'http://'),
substr_count($body, 'href'),
substr_count($body, 'ftp')
)
);
if ($n > 2)
{
#
# more than 2 : -1 point per link
#
$score -= $n;
}
else
{
#
# less than 2 : +2 points
#
$score += 2;
}
#
# now remove links
#
# html style: <a> <a/>
$body = preg_replace('#\<a\s.+\<\/a\>#', '', $body);
# bb style: [url] [/url]
$body = preg_replace('#\[url.+\/url\]#', '', $body);
# remaining addresses: http://
$body = preg_replace('#http://[^\s]+#', '', $body);
#
# how long is the body ?
#
$l = strlen($body);
if ($l > 20 && $n = 0)
{
#
# More than 20 characters and there's no links : +2 points
#
$score += 2;
}
else if ($l < 20)
{
#
# Less than 20 characters : -1 point
#
$score--;
}
#
# Keyword search
#
if (empty($words))
{
$words = array();
}
$words += array
(
'levitra', 'viagra', 'casino', 'free sex', 'porn'
);
foreach ($words as $word)
{
$n = substr_count($body, $word);
if (!$n)
{
continue;
}
$score -= $n;
}
#
# URL length
#
if (strlen($url) > 32)
{
$score--;
}
#
# Body starts with...
#
if (empty($starters))
{
$starters = array();
}
$starters += array
(
'interesting', 'sorry', 'nice', 'cool', 'hi'
);
foreach ($starters as $word)
{
$pos = strpos($body, $word . ' ');
if ($pos === false)
{
continue;
}
if ($pos > 10)
{
continue;
}
$score -= 10;
break;
}
#
# Author's name has 'http://' in it
#
if (strpos($author, 'http://'))
{
$score -= 2;
}
#
# How many different words are used ?
#
$count = str_word_count($body);
if ($count < 10)
{
$score -= 5;
}
return $score;
}
?>