HEX
Server: Apache
System: Linux pdx1-shared-a1-38 6.6.104-grsec-jammy+ #3 SMP Tue Sep 16 00:28:11 UTC 2025 x86_64
User: mmickelson (3396398)
PHP: 8.1.31
Disabled: NONE
Upload Files
File: /home/mmickelson/theflexguy.com/wp-content/themes/vanilla/PHPTAL/Dom/XmlParser.php
<?php
/* vim: set expandtab tabstop=4 shiftwidth=4: */
//  
//  Copyright (c) 2004-2005 Laurent Bedubourg
//  
//  This library is free software; you can redistribute it and/or
//  modify it under the terms of the GNU Lesser General Public
//  License as published by the Free Software Foundation; either
//  version 2.1 of the License, or (at your option) any later version.
//  
//  This library is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
//  Lesser General Public License for more details.
//  
//  You should have received a copy of the GNU Lesser General Public
//  License along with this library; if not, write to the Free Software
//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
//  
//  Authors: Laurent Bedubourg <lbedubourg@motion-twin.com>
//  

/**
 * Simple sax like xml parser for PHPTAL.
 *
 * Because PHP Xml parser libraries tends to fail giving a real xml document 
 * representation (at the time this file was created, it was impossible to 
 * retrieve doctypes, xml declaration, problem with comments and CDATA) this 
 * parser was created and can be manipulated to accept some user errors 
 * like < and < in attribute values or inside text nodes.
 *
 * @package phptal
 * @author Laurent Bedubourg <lbedubourg@motion-twin.com>
 * @see PHPTAL_Dom_Parser
 */
abstract class PHPTAL_XmlParser
{
    // available parser states
    const ST_ROOT = 0;
    const ST_TEXT = 1;
    const ST_LT   = 2;
    const ST_TAG_NAME = 3;
    const ST_TAG_CLOSE = 4;
    const ST_TAG_SINGLE = 5;
    const ST_TAG_ATTRIBUTES = 6;
    const ST_CDATA = 7;
    const ST_COMMENT = 8;
    const ST_DOCTYPE = 9;
    const ST_XMLDEC = 15;
    const ST_PREPROC = 10;
    const ST_ATTR_KEY = 11;
    const ST_ATTR_EQ = 12;
    const ST_ATTR_QUOTE = 13;
    const ST_ATTR_VALUE = 14;

    // exceptions error messages
    const ERR_CHARS_BEFORE_DOC_START = 
        "Characters found before the begining of the document!";
    const ERR_EXPECT_VALUE_QUOTE =
        "Unexpected '%s' character, expecting attribute single or double quote";
        
    const BOM_STR = "\xef\xbb\xbf";
    
    
    static $state_names = array(
      self::ST_ROOT => 'root node',
      self::ST_TEXT => 'text',
      self::ST_LT   => 'start of tag',
      self::ST_TAG_NAME => 'tag name',
      self::ST_TAG_CLOSE => 'closing tag',
      self::ST_TAG_SINGLE => 'self-closing tag',
      self::ST_TAG_ATTRIBUTES => 'tag',
      self::ST_CDATA => 'CDATA',
      self::ST_COMMENT => 'comment',
      self::ST_DOCTYPE => 'doctype',
      self::ST_XMLDEC => 'XML declaration',
      self::ST_PREPROC => 'preprocessor directive',
      self::ST_ATTR_KEY => 'attribute name',
      self::ST_ATTR_EQ => 'attribute value',
      self::ST_ATTR_QUOTE => 'quoted attribute value',
      self::ST_ATTR_VALUE => 'unquoted attribute value',
    );
    
    public function __construct() 
    {
        $this->_file = "<string>";
    }

    public function parseFile($src) 
    {
        if (!file_exists($src)) {
            throw new PHPTAL_Exception("file $src not found");
        }
        $this->parseString(file_get_contents($src), $src);
    }

    public function parseString($src, $filename = '<string>') 
    {        
        $this->_file = $filename;
        
        // remove BOM (utf8 byte order mark)... 
        if (substr($src,0,3) == self::BOM_STR){
            $src = substr($src, 3);
        }
        
        $this->_line = 1;
        $state = self::ST_ROOT;
        $mark  = 0;
        $len   = strlen($src);

        $quoteStyle = '"';
        $tagname    = "";
        $attribute  = "";
        $attributes = array();

        $customDoctype = false;

        $this->onDocumentStart();
        for ($i=0; $i<$len; $i++) {        
            $c = $src[$i];

            if ($c == "\n") $this->_line++;

            switch ($state) {
                case self::ST_ROOT:
                    if ($c == '<') {
                        $mark = $i; // mark tag start
                        $state = self::ST_LT;
                    }
                    else if (!self::isWhiteChar($c)) {
                        $this->raiseError(self::ERR_CHARS_BEFORE_DOC_START);
                    }
                    break;

                case self::ST_TEXT:
                    if ($c == '<') {
                        if ($mark != $i) {
                            $this->onElementData(substr($src, $mark, $i-$mark));
                        }
                        $mark = $i;
                        $state = self::ST_LT;
                    }
                    break;

                case self::ST_LT:
                    if ($c == '/') {
                        $mark = $i+1;
                        $state = self::ST_TAG_CLOSE;
                    }
                    else if ($c == '?' and substr($src, $i, 4) == '?xml') {
                        $state = self::ST_XMLDEC;
                    }
                    else if ($c == '?') {
                        $state = self::ST_PREPROC;
                    }
                    else if ($c == '!' and substr($src, $i, 3) == '!--') {
                        $state = self::ST_COMMENT;
                    }
                    else if ($c == '!' and substr($src, $i, 8) == '![CDATA[') {
                        $state = self::ST_CDATA;
                    }
                    else if ($c == '!' and substr($src, $i, 8) == '!DOCTYPE') {
                        $state = self::ST_DOCTYPE;
                    }
                    else if (!self::isAlpha($c)) {
                        $state = self::ST_TEXT;
                    }
                    else {
                        $mark = $i; // mark node name start
                        $attributes = array();
                        $attribute = "";
                        $state = self::ST_TAG_NAME;
                    }
                    break;

                case self::ST_TAG_NAME:
                    if (self::isWhiteChar($c)) {
                        $tagname = substr($src, $mark, $i-$mark);
                        $state = self::ST_TAG_ATTRIBUTES;
                    }
                    else if ($c == '/') {
                        $tagname = substr($src, $mark, $i-$mark);
                        $state = self::ST_TAG_SINGLE;
                    }
                    else if ($c == '>') {
                        $tagname = substr($src, $mark, $i-$mark);
                        $mark = $i+1; // mark text start
                        $state = self::ST_TEXT;
                        $this->onElementStart($tagname, $attributes);
                    }
                    break;

                case self::ST_TAG_CLOSE:
                    if ($c == '>') {
                        $tagname = rtrim(substr($src, $mark, $i-$mark));
                        $this->onElementClose($tagname);
                        $mark = $i+1; // mark text start
                        $state = self::ST_TEXT;
                    }
                    break;

                case self::ST_TAG_SINGLE:
                    if ($c != '>') {
                        // error
                    }
                    $mark = $i+1;   // mark text start
                    $state = self::ST_TEXT;
                    $this->onElementStart($tagname, $attributes);
                    $this->onElementClose($tagname);
                    break;

                case self::ST_TAG_ATTRIBUTES:
                    if ($c == '>') {
                        $mark = $i+1;   // mark text start
                        $state = self::ST_TEXT;
                        $this->onElementStart($tagname, $attributes);
                    }
                    else if ($c == '/') {
                        $state = self::ST_TAG_SINGLE;
                    }
                    else if (self::isWhiteChar($c)) {
                    }
                    else {
                        $mark = $i; // mark attribute key start
                        $state = self::ST_ATTR_KEY;
                    }
                    break;

                case self::ST_COMMENT:
                    if ($c == '>' and substr($src, $i-2, 2) == '--') {
                        $this->onComment(substr($src, $mark, $i-$mark+1));
                        $mark = $i+1; // mark text start
                        $state = self::ST_TEXT;
                    }
                    break;

                case self::ST_CDATA:
                    if ($c == '>' and substr($src, $i-2, 2) == ']]') {
                        $this->onSpecific(substr($src, $mark, $i-$mark+1));
                        $mark = $i+1; // mark text start
                        $state = self::ST_TEXT;
                    }
                    break;

                case self::ST_XMLDEC:
                    if ($c == '?' && substr($src, $i, 2) == '?>') {
                        $this->onXmlDecl(substr($src, $mark, $i-$mark+2));
                        $i++; // skip '>'
                        $mark = $i+1; // mark text start
                        $state = self::ST_TEXT;
                    }
                    break;

                case self::ST_DOCTYPE:
                    if ($c == '[') {
                        $customDoctype = true;
                    }
                    else if ($customDoctype && $c == '>' && substr($src, $i-1, 2) == ']>'){
                        $customDoctype = false;
                        $this->onDocType(substr($src, $mark, $i-$mark+1));
                        $mark = $i+1; // mark text start
                        $state = self::ST_TEXT;
                    }
                    else if (!$customDoctype && $c == '>') {
                        $customDoctype = false;
                        $this->onDocType(substr($src, $mark, $i-$mark+1));
                        $mark = $i+1; // mark text start
                        $state = self::ST_TEXT;
                    }
                    break;

                case self::ST_PREPROC:
                    if ($c == '>' and $src[$i-1] == '?') {
                        $this->onSpecific(substr($src, $mark, $i-$mark+1));
                        $mark = $i+1; // mark text start
                        $state = self::ST_TEXT;
                    }
                    break;

                case self::ST_ATTR_KEY:
                    if (self::isWhiteChar($c)) {
                        $attribute = substr($src, $mark, $i-$mark);
                        $state = self::ST_ATTR_EQ;
                    }
                    else if ($c == '=') {
                        $attribute = substr($src, $mark, $i-$mark);
                        $state = self::ST_ATTR_VALUE;
                    }
                    break;

                case self::ST_ATTR_EQ:
                    if ($c == '=') {
                        $state = self::ST_ATTR_VALUE;
                    }
                    break;

                case self::ST_ATTR_VALUE:
                    if (self::isWhiteChar($c)){
                    }
                    else if ($c == '"' or $c == '\'') {
                        $quoteStyle = $c;
                        $state = self::ST_ATTR_QUOTE;
                        $mark = $i+1; // mark attribute real value start
                    }
                    else {
                        $err = self::ERR_EXPECT_VALUE_QUOTE;
                        $err = sprintf($err, $c);                            
                        $this->raiseError($err);
                    }
                    break;

                case self::ST_ATTR_QUOTE:
                    if ($c == $quoteStyle) {
                        if (isset($attributes[$attribute])) $this->raiseError("Attribute '$attribute' on '$tagname' is defined more than once");
                        $attributes[$attribute] = substr($src, $mark, $i-$mark);
                        $state = self::ST_TAG_ATTRIBUTES;
                    }
                    break;
            }
        }
        
        if ($state == self::ST_TEXT) // allows text past root node, which is in violation of XML spec
        {
            if ($i > $mark)
            {
                $text = substr($src, $mark, $i-$mark);
                //if (!ctype_space($text)) $this->onElementData($text);
                if (!ctype_space($text)) $this->raiseError("Characters found after end of the root element");
            }
        }
        else
        {
            throw new PHPTAL_Exception("Finished document in unexpected state: ".self::$state_names[$state]." is not finished");
        }
        
        $this->onDocumentEnd();
    }

    public function getSourceFile()
    {
        return $this->_file;
    }
    
    public function getLineNumber()
    {
        return $this->_line;
    }

    public static function isWhiteChar($c)
    {
        return strpos(" \t\n\r\0", $c) !== false;
    }

    public static function isAlpha($c)
    {
        $char = strtolower($c);
        return ($char >= 'a' && $char <= 'z');
    }

    public abstract function onDocType($doctype);
    public abstract function onXmlDecl($decl);
    public abstract function onSpecific($data);
    public abstract function onComment($data);
    public abstract function onElementStart($name, $attributes);
    public abstract function onElementClose($name);
    public abstract function onElementData($data);
    public abstract function onDocumentStart();
    public abstract function onDocumentEnd();
    
    protected function raiseError($errFmt)
    {
        $args = func_get_args();
        $errStr = call_user_func_array('sprintf', $args);
        
        $str = "%s error: %s in %s:%d";
        $str = sprintf($str, get_class($this), $errStr, $this->_file, $this->_line);
        throw new PHPTAL_Exception($str);
    }
    
    private $_file;
    private $_line;
    private $_source;
}


?>