voici une fonction qui séparera le nom du prénom dans une chaîne de caractère dans le sens : prénom nom.
ça sert à rien mais c'est joli.
c'est pas parfait, mais cela permet quand même de sectionner de manière cohérente.
voici les résultats obtenus :
envoyé : antoine zoccoliere
prénom : antoine
nom : zoccoliere
envoyé : antoine de la zoccoliere
prénom : antoine
nom : de la zoccoliere
envoyé : antoine françois zoccoliere
prénom : antoine
nom : françois zoccoliere
envoyé : antoine-françois zoccoliere
prénom : antoine-françois
nom : zoccoliere
envoyé : antoine françois paul de la zoccoliere
prénom : antoine
nom : françois paul de la zoccoliere
envoyé : antoine François Paul de la zoccoliere
prénom : antoine François Paul
nom : de la zoccoliere
envoyé : antoine françois paul DE LA ZOCCOLIERE
prénom : antoine
nom : françois paul DE LA ZOCCOLIERE
envoyé : antoine François Paul DE LA ZOCCOLIERE
prénom : antoine François Paul
nom : DE LA ZOCCOLIERE
envoyé : antoine François Paul De La Zoccoliere
prénom : antoine François Paul
nom : De La Zoccoliere
et c'est parti :
<?php
function ExtractCustomerFirstAndLastNameFromString($entire_name, &$firstname, &$lastname){
$fname = '';
$lname = '';
//we explode the string :
$name_array = explode(' ',$entire_name);
$nb_wd = count($name_array);
if($nb_wd>2){//there we are
$tagged = array();
$tagged['name'] = array();
$tagged['name']['nb'] = 0;
$tagged['name']['keys'] = array();
$tagged['firstname'] = array();
$tagged['firstname']['nb'] = 0;
$tagged['firstname']['keys'] = array();
$tagged['common'] = array();
for($i=0; $i<$nb_wd; $i++){
$name_part = $name_array[$i];
if($i == 0 || $i == ($nb_wd-1)){//no doubt on the first and last value
if($i == 0){
$tagged['firstname']['nb']++;
$tagged['firstname']['keys'][] = $i;
if(isset($tagged['common'][$i])){
$tagged['common'][$i]++;
}else{
$tagged['common'][$i] = 1;
}
}else{
$tagged['name']['nb']++;
$tagged['name']['keys'][] = $i;
if(isset($tagged['common'][$i])){
$tagged['common'][$i]++;
}else{
$tagged['common'][$i] = 1;
}
}
}else{
//once we're gonna try if there is upper case identificable
$lower = strtolower($name_part);
$upper_after_lower = strtoupper($lower);
$ucf = ucfirst($lower);
//in usage : if the word is entierly in upper case it's a name
if($name_part == $upper_after_lower){
$tagged['name']['nb']++;
$tagged['name']['keys'][] = $i;
if(isset($tagged['common'][$i])){
$tagged['common'][$i]++;
}else{
$tagged['common'][$i] = 1;
}
}
//in usage : if the word is less than 4 character length it's part of name
if(strlen($name_part) < 4){
$tagged['name']['nb']++;
$tagged['name']['keys'][] = $i;
if(isset($tagged['common'][$i])){
$tagged['common'][$i]++;
}else{
$tagged['common'][$i] = 1;
}
}
//in usage : if the word contain only one upper it's a firstname
if($name_part == $ucf){
$tagged['firstname']['nb']++;
$tagged['firstname']['keys'][] = $i;
if(isset($tagged['common'][$i])){
$tagged['common'][$i]++;
}else{
$tagged['common'][$i] = 1;
}
}
}
}
//we corelate it
$tagged_words = $tagged['name']['nb'] + $tagged['firstname']['nb'];
$fname_len = count($tagged['firstname']['keys']);
$lname_len = count($tagged['name']['keys']);
//we count the number of associating of each part
$fkeys = array();
for($i=0;$i<$fname_len;$i++){//on vérifie que les mots se suivent
if(isset($fkeys[$tagged['firstname']['keys'][$i]])){
$fkeys[$tagged['firstname']['keys'][$i]]++;
unset($tagged['firstname']['keys'][$i]);
$tagged['firstname']['nb']--;
}else{
$fkeys[$tagged['firstname']['keys'][$i]] = 1;
}
}
$lkeys = array();
for($i=0;$i<$lname_len;$i++){//on vérifie que les mots se suivent
if(isset($lkeys[$tagged['name']['keys'][$i]])){
$lkeys[$tagged['name']['keys'][$i]]++;
unset($tagged['name']['keys'][$i]);
$tagged['name']['nb']--;
}else{
$lkeys[$tagged['name']['keys'][$i]] = 1;
}
}
//we clean a little
$tmp = $tagged['firstname']['keys'];
$tagged['firstname']['keys'] = array_values($tmp);
$tmp = $tagged['name']['keys'];
$tagged['name']['keys'] = array_values($tmp);
unset($tmp);
//we'r gonna try each and associate to the most quantized referer - you will see
for($i=0; $i<$nb_wd; $i++){
if((isset($fkeys[$i]) && $fkeys[$i] > 0) || (isset($lkeys[$i]) && $lkeys[$i] > 0)){
if($fkeys[$i] > $lkeys[$i]){//it seems to be more part of firstname than name
$j = array_search($i, $tagged['name']['keys']);
if($j !== false){
unset($tagged['name']['keys'][$j]);
$tagged['name']['nb'] -= $lkeys[$i];
}
if(array_search($i, $tagged['firstname']['keys']) === false){
$tagged['firstname']['keys'][] = $i;
$tagged['firstname']['nb']++;
}else{
$tagged['firstname']['nb'] -= ($fkeys[$i]-1);
}
$last_choice = 'f';
}elseif($fkeys[$i] < $lkeys[$i]){//it seems to be more part of name than firstname
$j = array_search($i, $tagged['firstname']['keys']);
if($j !== false){
unset($tagged['firstname']['keys'][$j]);
$tagged['firstname']['nb'] -= $fkeys[$i];
}
if(array_search($i, $tagged['name']['keys']) === false){
$tagged['name']['keys'][] = $i;
$tagged['name']['nb']++;
}else{
$tagged['name']['nb'] -= ($lkeys[$i]-1);
}
$last_choice = 'l';
}else{
if($last_choice == 'f' && $i<2){
$j = array_search($i, $tagged['name']['keys']);
if($j !== false){
unset($tagged['name']['keys'][$j]);
$tagged['name']['nb'] -= $lkeys[$i];
}
if(array_search($i, $tagged['firstname']['keys']) === false){
$tagged['firstname']['keys'][] = $i;
$tagged['firstname']['nb']++;
}else{
$tagged['firstname']['nb'] -= ($fkeys[$i]-1);
}
}else{
$j = array_search($i, $tagged['firstname']['keys']);
if($j !== false){
unset($tagged['firstname']['keys'][$j]);
$tagged['firstname']['nb'] -= $fkeys[$i];
}
if(array_search($i, $tagged['name']['keys']) === false){
$tagged['name']['keys'][] = $i;
$tagged['name']['nb']++;
}else{
$tagged['name']['nb'] -= ($lkeys[$i]-1);
}
}
}
}else{//if it's not associated we associate it to last name - because it seem more possible
$tagged['name']['keys'][] = $i;
$tagged['name']['nb']++;
}
}
//we clean a little...again
$tmp = $tagged['firstname']['keys'];
$tagged['firstname']['keys'] = array_values($tmp);
$tmp = $tagged['name']['keys'];
$tagged['name']['keys'] = array_values($tmp);
unset($tmp);
//count is now ok
$fname_len = count($tagged['firstname']['keys']);
$lname_len = count($tagged['name']['keys']);
$ftest = 0;
$fconf = 0;
for($i=0;$i<$fname_len;$i++){//on vérifie que les mots se suivent
$ftest = $ftest + $tagged['firstname']['keys'][$i];
$fconf = $fconf + $i;
}
if($ftest != $fconf){//we see if there is more than 2 parts of first name
if($tagged['firstname']['nb']>2){//its the case, we unset the referer of all the other values
for($i=0;$i<$fname_len;$i++){//on vérifie que les mots se suivent
$ftest = $ftest + $tagged['firstname']['keys'][$i];
$fconf = $fconf + $i;
if($fconf != $ftest || $i > 1){//we transfert it to lastname
$tagged['name']['keys'][] = $tagged['firstname']['keys'][$i];
$tagged['name']['nb']++;
unset($tagged['firstname']['keys'][$i]);
$tagged['firstname']['nb']--;
}
}
}
}
//it's now all ok
asort($tagged['firstname']['keys']);
asort($tagged['name']['keys']);
//we clean a little...again and again
$tmp = $tagged['firstname']['keys'];
$tagged['firstname']['keys'] = array_values($tmp);
$tmp = $tagged['name']['keys'];
$tagged['name']['keys'] = array_values($tmp);
unset($tmp);
$fname_len = count($tagged['firstname']['keys']);
$lname_len = count($tagged['name']['keys']);
for($i=0;$i<$fname_len;$i++){//on vérifie que les mots se suivent
$fname = $fname . ' ' . $name_array[$tagged['firstname']['keys'][$i]];
}
for($i=0;$i<$lname_len;$i++){//on vérifie que les mots se suivent
$lname = $lname . ' ' . $name_array[$tagged['name']['keys'][$i]];
}
}else{
$fname = $name_array[0];
$lname = $name_array[1];
}
$firstname = $fname;
$lastname = $lname;
return $entire_name;
}
$a = ExtractCustomerFirstAndLastNameFromString('antoine zoccoliere', $firstname, $lastname);
echo 'envoyé : '.$a.'<br>prénom : '.$firstname.'<br>nom : '.$lastname.'<br><br><br>'.chr(10);
$a = ExtractCustomerFirstAndLastNameFromString('antoine de la zoccoliere', $firstname, $lastname);
echo 'envoyé : '.$a.'<br>prénom : '.$firstname.'<br>nom : '.$lastname.'<br><br><br>'.chr(10);
$a = ExtractCustomerFirstAndLastNameFromString('antoine françois zoccoliere', $firstname, $lastname);
echo 'envoyé : '.$a.'<br>prénom : '.$firstname.'<br>nom : '.$lastname.'<br><br><br>'.chr(10);
$a = ExtractCustomerFirstAndLastNameFromString('antoine-françois zoccoliere', $firstname, $lastname);
echo 'envoyé : '.$a.'<br>prénom : '.$firstname.'<br>nom : '.$lastname.'<br><br><br>'.chr(10);
$a = ExtractCustomerFirstAndLastNameFromString('antoine françois paul de la zoccoliere', $firstname, $lastname);
echo 'envoyé : '.$a.'<br>prénom : '.$firstname.'<br>nom : '.$lastname.'<br><br><br>'.chr(10);
$a = ExtractCustomerFirstAndLastNameFromString('antoine François Paul de la zoccoliere', $firstname, $lastname);
echo 'envoyé : '.$a.'<br>prénom : '.$firstname.'<br>nom : '.$lastname.'<br><br><br>'.chr(10);
$a = ExtractCustomerFirstAndLastNameFromString('antoine françois paul DE LA ZOCCOLIERE', $firstname, $lastname);
echo 'envoyé : '.$a.'<br>prénom : '.$firstname.'<br>nom : '.$lastname.'<br><br><br>'.chr(10);
$a = ExtractCustomerFirstAndLastNameFromString('antoine François Paul DE LA ZOCCOLIERE', $firstname, $lastname);
echo 'envoyé : '.$a.'<br>prénom : '.$firstname.'<br>nom : '.$lastname.'<br><br><br>'.chr(10);
$a = ExtractCustomerFirstAndLastNameFromString('antoine François Paul De La Zoccoliere', $firstname, $lastname);
echo 'envoyé : '.$a.'<br>prénom : '.$firstname.'<br>nom : '.$lastname.'<br><br><br>'.chr(10);
?>
ps : pour ajouter une pondération, par exemple tous vos nom commencent par *, il suffit d'ajouter une section du type :
//in usage : if the word is entierly in upper case it's a name
if($name_part == $upper_after_lower){
$tagged['name']['nb']++;
$tagged['name']['keys'][] = $i;
if(isset($tagged['common'][$i])){
$tagged['common'][$i]++;
}else{
$tagged['common'][$i] = 1;
}
}
qui à votre sauce serait :
//special case : all names contain an *
if(strpos('*',$name_part)!==FALSE){
$tagged['name']['nb']++;
$tagged['name']['keys'][] = $i;
if(isset($tagged['common'][$i])){
$tagged['common'][$i]++;
}else{
$tagged['common'][$i] = 1;
}
}
voila