Explication de script

Eléphanteau du PHP | 36 Messages

12 sept. 2006, 17:01

(nouveau post car nouveau code pour pas mélanger)

Salut je viens de trouver un script qui permet de parser du xhtml

mais les explication sont en anglais
serait il possible de commenter et expliquer le script

Que faut il paramettrer ou modifier pour parser la page www.machin.com ?

Code : Tout sélectionner

<?php /** * HTML/XML Parser Class * * This is a helper class that is used to parse HTML and XML. A unique feature of this parsing class * is the fact that it includes support for innerHTML (which isn't easy to do). * * @author Dennis Pallett * @copyright Dennis Pallett 2006 * @package HTML_Parser * @version 1.0 */ // Helper Class // To parse HTML/XML Class HTML_Parser { // Private properties var $_parser; var $_tags = array(); var $_html; var $output = array(); var $strXmlData; var $_level = 0; var $_outline; var $_tagcount = array(); var $xml_error = false; var $xml_error_code; var $xml_error_string; var $xml_error_line_number; function get_html () { return $this->_html; } function parse($strInputXML) { $this->output = array(); // Translate entities $strInputXML = $this->translate_entities($strInputXML); $this->_parser = xml_parser_create (); xml_parser_set_option($this->_parser, XML_OPTION_CASE_FOLDING, true); xml_set_object($this->_parser,$this); xml_set_element_handler($this->_parser, "tagOpen", "tagClosed"); xml_set_character_data_handler($this->_parser, "tagData"); $this->strXmlData = xml_parse($this->_parser,$strInputXML ); if (!$this->strXmlData) { $this->xml_error = true; $this->xml_error_code = xml_get_error_code($this->_parser); $this->xml_error_string = xml_error_string(xml_get_error_code($this->_parser)); $this->xml_error_line_number = xml_get_current_line_number($this->_parser); return false; } return $this->output; } function tagOpen($parser, $name, $attr) { // Increase level $this->_level++; // Create tag: $newtag = $this->create_tag($name, $attr); // Build tag $tag = array("name"=>$name,"attr"=>$attr, "level"=>$this->_level); // Add tag array_push ($this->output, $tag); // Add tag to this level $this->_tags[$this->_level] = $tag; // Add to HTML $this->_html .= $newtag; // Add to outline $this->_outline .= $this->_level . $newtag; } function create_tag ($name, $attr) { // Create tag: # Begin with name $tag = '<' . strtolower($name) . ' '; # Create attribute list foreach ($attr as $key=>$val) { $tag .= strtolower($key) . '="' . htmlentities($val) . '" '; } # Finish tag $tag = trim($tag); switch(strtolower($name)) { case 'br': case 'input': $tag .= ' /'; break; } $tag .= '>'; return $tag; } function tagData($parser, $tagData) { if(trim($tagData)) { if(isset($this->output[count($this->output)-1]['tagData'])) { $this->output[count($this->output)-1]['tagData'] .= $tagData; } else { $this->output[count($this->output)-1]['tagData'] = $tagData; } } $this->_html .= htmlentities($tagData); $this->_outline .= htmlentities($tagData); } function tagClosed($parser, $name) { // Add to HTML and outline switch (strtolower($name)) { case 'br': case 'input': break; default: $this->_outline .= $this->_level . '</' . strtolower($name) . '>'; $this->_html .= '</' . strtolower($name) . '>'; } // Get tag that belongs to this end $tag = $this->_tags[$this->_level]; $tag = $this->create_tag($tag['name'], $tag['attr']); // Try to get innerHTML $regex = '%' . preg_quote($this->_level . $tag, '%') . '(.*?)' . preg_quote($this->_level . '</' . strtolower($name) . '>', '%') . '%is'; preg_match ($regex, $this->_outline, $matches); // Get innerHTML if (isset($matches['1'])) { $innerhtml = $matches['1']; } // Remove level identifiers $this->_outline = str_replace($this->_level . $tag, $tag, $this->_outline); $this->_outline = str_replace($this->_level . '</' . strtolower($name) . '>', '</' . strtolower($name) . '>', $this->_outline); // Add innerHTML if (isset($innerhtml)) { $this->output[count($this->output)-1]['innerhtml'] = $innerhtml; } // Fix tree $this->output[count($this->output)-2]['children'][] = $this->output[count($this->output)-1]; array_pop($this->output); // Decrease level $this->_level--; } function translate_entities($xmlSource, $reverse =FALSE) { static $literal2NumericEntity; if (empty($literal2NumericEntity)) { $transTbl = get_html_translation_table(HTML_ENTITIES); foreach ($transTbl as $char => $entity) { if (strpos('&"<>', $char) !== FALSE) continue; $literal2NumericEntity[$entity] = '&#'.ord($char).';'; } } if ($reverse) { return strtr($xmlSource, array_flip($literal2NumericEntity)); } else { return strtr($xmlSource, $literal2NumericEntity); } } } // To be used like this $parser = new HTML_Parser; $output = $parser->parse($html); print_r ($output); ?>