distinguer, rechercher, différencier nom du prénom

Petit nouveau ! | 1 Messages

14 janv. 2011, 19:55

bonjour,

voici une fonction qui séparera le nom du prénom dans une chaîne de caractère dans le sens : prénom nom.

ça sert à rien mais c'est joli.

c'est pas parfait, mais cela permet quand même de sectionner de manière cohérente.

voici les résultats obtenus :
envoyé : antoine zoccoliere
prénom : antoine
nom : zoccoliere

envoyé : antoine de la zoccoliere
prénom : antoine
nom : de la zoccoliere


envoyé : antoine françois zoccoliere
prénom : antoine
nom : françois zoccoliere

envoyé : antoine-françois zoccoliere
prénom : antoine-françois
nom : zoccoliere

envoyé : antoine françois paul de la zoccoliere
prénom : antoine
nom : françois paul de la zoccoliere


envoyé : antoine François Paul de la zoccoliere
prénom : antoine François Paul
nom : de la zoccoliere

envoyé : antoine françois paul DE LA ZOCCOLIERE
prénom : antoine
nom : françois paul DE LA ZOCCOLIERE

envoyé : antoine François Paul DE LA ZOCCOLIERE
prénom : antoine François Paul
nom : DE LA ZOCCOLIERE

envoyé : antoine François Paul De La Zoccoliere
prénom : antoine François Paul
nom : De La Zoccoliere

et c'est parti :
<?php

	function ExtractCustomerFirstAndLastNameFromString($entire_name, &$firstname, &$lastname){
		$fname = '';
		$lname = '';
		
		//we explode the string :
		$name_array = explode(' ',$entire_name);
		$nb_wd = count($name_array);
		if($nb_wd>2){//there we are
			$tagged = array();
			$tagged['name'] = array();
			$tagged['name']['nb'] = 0;
			$tagged['name']['keys'] = array();
			$tagged['firstname'] = array();
			$tagged['firstname']['nb'] = 0;
			$tagged['firstname']['keys'] = array();
			$tagged['common'] = array();
			for($i=0; $i<$nb_wd; $i++){
				$name_part = $name_array[$i];
				if($i == 0 || $i == ($nb_wd-1)){//no doubt on the first and last value
					if($i == 0){
						$tagged['firstname']['nb']++;
						$tagged['firstname']['keys'][] = $i;
						if(isset($tagged['common'][$i])){
							$tagged['common'][$i]++;
						}else{
							$tagged['common'][$i] = 1;
						}
					}else{
						$tagged['name']['nb']++;
						$tagged['name']['keys'][] = $i;
						if(isset($tagged['common'][$i])){
							$tagged['common'][$i]++;
						}else{
							$tagged['common'][$i] = 1;
						}
					}
				}else{
				
					//once we're gonna try if there is upper case identificable
					$lower = strtolower($name_part);
					$upper_after_lower = strtoupper($lower);
					$ucf = ucfirst($lower);
					
					//in usage : if the word is entierly in upper case it's a name
					if($name_part == $upper_after_lower){
						$tagged['name']['nb']++;
						$tagged['name']['keys'][] = $i;
						if(isset($tagged['common'][$i])){
							$tagged['common'][$i]++;
						}else{
							$tagged['common'][$i] = 1;
						}
					}
					//in usage : if the word is less than 4 character length it's part of name
					if(strlen($name_part) < 4){
						$tagged['name']['nb']++;
						$tagged['name']['keys'][] = $i;
						if(isset($tagged['common'][$i])){
							$tagged['common'][$i]++;
						}else{
							$tagged['common'][$i] = 1;
						}
					}
					//in usage : if the word contain only one upper it's a firstname
					if($name_part == $ucf){
						$tagged['firstname']['nb']++;
						$tagged['firstname']['keys'][] = $i;
						if(isset($tagged['common'][$i])){
							$tagged['common'][$i]++;
						}else{
							$tagged['common'][$i] = 1;
						}
					}
				}
			}
			//we corelate it
			$tagged_words = $tagged['name']['nb'] + $tagged['firstname']['nb'];
			$fname_len = count($tagged['firstname']['keys']);
			$lname_len = count($tagged['name']['keys']);
			//we count the number of associating of each part
			$fkeys = array();
			for($i=0;$i<$fname_len;$i++){//on vérifie que les mots se suivent
				if(isset($fkeys[$tagged['firstname']['keys'][$i]])){
					$fkeys[$tagged['firstname']['keys'][$i]]++;
					unset($tagged['firstname']['keys'][$i]);
					$tagged['firstname']['nb']--;
				}else{
					$fkeys[$tagged['firstname']['keys'][$i]] = 1;
				}
			}
			$lkeys = array();
			for($i=0;$i<$lname_len;$i++){//on vérifie que les mots se suivent
				if(isset($lkeys[$tagged['name']['keys'][$i]])){
					$lkeys[$tagged['name']['keys'][$i]]++;
					unset($tagged['name']['keys'][$i]);
					$tagged['name']['nb']--;
				}else{
					$lkeys[$tagged['name']['keys'][$i]] = 1;
				}
			}
			//we clean a little
			$tmp = $tagged['firstname']['keys'];
			$tagged['firstname']['keys'] = array_values($tmp);
			$tmp = $tagged['name']['keys'];
			$tagged['name']['keys'] = array_values($tmp);
			unset($tmp);
			
			//we'r gonna try each and associate to the most quantized referer - you will see	
			for($i=0; $i<$nb_wd; $i++){
				if((isset($fkeys[$i]) && $fkeys[$i] > 0) || (isset($lkeys[$i]) && $lkeys[$i] > 0)){
					if($fkeys[$i] > $lkeys[$i]){//it seems to be more part of firstname than name
						$j = array_search($i, $tagged['name']['keys']);
						if($j !== false){
							unset($tagged['name']['keys'][$j]);
							$tagged['name']['nb'] -= $lkeys[$i];
						}
						if(array_search($i, $tagged['firstname']['keys']) === false){
							$tagged['firstname']['keys'][] = $i;
							$tagged['firstname']['nb']++;
						}else{
							$tagged['firstname']['nb'] -= ($fkeys[$i]-1);
						}
						$last_choice = 'f';
					}elseif($fkeys[$i] < $lkeys[$i]){//it seems to be more part of name than firstname
						$j = array_search($i, $tagged['firstname']['keys']);
						if($j !== false){
							unset($tagged['firstname']['keys'][$j]);
							$tagged['firstname']['nb'] -= $fkeys[$i];
						}
						if(array_search($i, $tagged['name']['keys']) === false){
							$tagged['name']['keys'][] = $i;
							$tagged['name']['nb']++;
						}else{
							$tagged['name']['nb'] -= ($lkeys[$i]-1);
						}
						$last_choice = 'l';
					}else{
						if($last_choice == 'f' && $i<2){
							$j = array_search($i, $tagged['name']['keys']);
							if($j !== false){
								unset($tagged['name']['keys'][$j]);
								$tagged['name']['nb'] -= $lkeys[$i];
							}
							if(array_search($i, $tagged['firstname']['keys']) === false){
								$tagged['firstname']['keys'][] = $i;
								$tagged['firstname']['nb']++;
							}else{
								$tagged['firstname']['nb'] -= ($fkeys[$i]-1);
							}
						}else{
							$j = array_search($i, $tagged['firstname']['keys']);
							if($j !== false){
								unset($tagged['firstname']['keys'][$j]);
								$tagged['firstname']['nb'] -= $fkeys[$i];
							}
							if(array_search($i, $tagged['name']['keys']) === false){
								$tagged['name']['keys'][] = $i;
								$tagged['name']['nb']++;
							}else{
								$tagged['name']['nb'] -= ($lkeys[$i]-1);
							}
						}
					}
				}else{//if it's not associated we associate it to last name - because it seem more possible
					$tagged['name']['keys'][] = $i;
					$tagged['name']['nb']++;
				}
			}
			//we clean a little...again
			$tmp = $tagged['firstname']['keys'];
			$tagged['firstname']['keys'] = array_values($tmp);
			$tmp = $tagged['name']['keys'];
			$tagged['name']['keys'] = array_values($tmp);
			unset($tmp);
			
			//count is now ok
			$fname_len = count($tagged['firstname']['keys']);
			$lname_len = count($tagged['name']['keys']);
			$ftest = 0;
			$fconf = 0;
			for($i=0;$i<$fname_len;$i++){//on vérifie que les mots se suivent
				$ftest = $ftest + $tagged['firstname']['keys'][$i];
				$fconf = $fconf + $i;
			}
			if($ftest != $fconf){//we see if there is more than 2 parts of first name
				if($tagged['firstname']['nb']>2){//its the case, we unset the referer of all the other values 
					for($i=0;$i<$fname_len;$i++){//on vérifie que les mots se suivent
						$ftest = $ftest + $tagged['firstname']['keys'][$i];
						$fconf = $fconf + $i;
						if($fconf != $ftest || $i > 1){//we transfert it to lastname
							$tagged['name']['keys'][] = $tagged['firstname']['keys'][$i];
							$tagged['name']['nb']++;
							unset($tagged['firstname']['keys'][$i]);
							$tagged['firstname']['nb']--;
						}
					}
				}
			}
			//it's now all ok
			asort($tagged['firstname']['keys']);
			asort($tagged['name']['keys']);
			//we clean a little...again and again
			$tmp = $tagged['firstname']['keys'];
			$tagged['firstname']['keys'] = array_values($tmp);
			$tmp = $tagged['name']['keys'];
			$tagged['name']['keys'] = array_values($tmp);
			unset($tmp);
			
			$fname_len = count($tagged['firstname']['keys']);
			$lname_len = count($tagged['name']['keys']);
			for($i=0;$i<$fname_len;$i++){//on vérifie que les mots se suivent
				$fname = $fname . ' ' . $name_array[$tagged['firstname']['keys'][$i]];
			}
			for($i=0;$i<$lname_len;$i++){//on vérifie que les mots se suivent
				$lname = $lname . ' ' . $name_array[$tagged['name']['keys'][$i]];
			}
		}else{
			$fname = $name_array[0];
			$lname = $name_array[1];
		}

		$firstname = $fname;
		$lastname = $lname;
		
		return $entire_name;
	}

	
	
	$a = ExtractCustomerFirstAndLastNameFromString('antoine zoccoliere', $firstname, $lastname);
	echo 'envoyé : '.$a.'<br>prénom : '.$firstname.'<br>nom : '.$lastname.'<br><br><br>'.chr(10);
	$a = ExtractCustomerFirstAndLastNameFromString('antoine de la zoccoliere', $firstname, $lastname);
	echo 'envoyé : '.$a.'<br>prénom : '.$firstname.'<br>nom : '.$lastname.'<br><br><br>'.chr(10);
	$a = ExtractCustomerFirstAndLastNameFromString('antoine françois zoccoliere', $firstname, $lastname);
	echo 'envoyé : '.$a.'<br>prénom : '.$firstname.'<br>nom : '.$lastname.'<br><br><br>'.chr(10);
	$a = ExtractCustomerFirstAndLastNameFromString('antoine-françois zoccoliere', $firstname, $lastname);
	echo 'envoyé : '.$a.'<br>prénom : '.$firstname.'<br>nom : '.$lastname.'<br><br><br>'.chr(10);
	$a = ExtractCustomerFirstAndLastNameFromString('antoine françois paul de la zoccoliere', $firstname, $lastname);
	echo 'envoyé : '.$a.'<br>prénom : '.$firstname.'<br>nom : '.$lastname.'<br><br><br>'.chr(10);
	$a = ExtractCustomerFirstAndLastNameFromString('antoine François Paul de la zoccoliere', $firstname, $lastname);
	echo 'envoyé : '.$a.'<br>prénom : '.$firstname.'<br>nom : '.$lastname.'<br><br><br>'.chr(10);
	$a = ExtractCustomerFirstAndLastNameFromString('antoine françois paul DE LA ZOCCOLIERE', $firstname, $lastname);
	echo 'envoyé : '.$a.'<br>prénom : '.$firstname.'<br>nom : '.$lastname.'<br><br><br>'.chr(10);
	$a = ExtractCustomerFirstAndLastNameFromString('antoine François Paul DE LA ZOCCOLIERE', $firstname, $lastname);
	echo 'envoyé : '.$a.'<br>prénom : '.$firstname.'<br>nom : '.$lastname.'<br><br><br>'.chr(10);
	$a = ExtractCustomerFirstAndLastNameFromString('antoine François Paul De La Zoccoliere', $firstname, $lastname);
	echo 'envoyé : '.$a.'<br>prénom : '.$firstname.'<br>nom : '.$lastname.'<br><br><br>'.chr(10);
?>
ps : pour ajouter une pondération, par exemple tous vos nom commencent par *, il suffit d'ajouter une section du type :
					//in usage : if the word is entierly in upper case it's a name
					if($name_part == $upper_after_lower){
						$tagged['name']['nb']++;
						$tagged['name']['keys'][] = $i;
						if(isset($tagged['common'][$i])){
							$tagged['common'][$i]++;
						}else{
							$tagged['common'][$i] = 1;
						}
					}
qui à votre sauce serait :
					//special case : all names contain an *
					if(strpos('*',$name_part)!==FALSE){
						$tagged['name']['nb']++;
						$tagged['name']['keys'][] = $i;
						if(isset($tagged['common'][$i])){
							$tagged['common'][$i]++;
						}else{
							$tagged['common'][$i] = 1;
						}
					}
voila