Einzelnen Beitrag anzeigen
  #2 (permalink)  
Alt 07.09.2013, 22:58
mermshaus mermshaus ist offline
Erfahrener Benutzer
XHTMLforum-Mitglied
 
Registriert seit: 13.07.2006
Beiträge: 745
mermshaus ist ein wunderbarer Anblickmermshaus ist ein wunderbarer Anblickmermshaus ist ein wunderbarer Anblickmermshaus ist ein wunderbarer Anblickmermshaus ist ein wunderbarer Anblickmermshaus ist ein wunderbarer Anblick
Standard

Du musst das im regulären Ausdruck klammern (runde Klammern), was du später referenzieren willst.

Früher oder später wirst du aber vermutlich einen echten Lexer/Parser benötigen.

Ich habe so was hier für CSS mal angefangen. Das sollte dir ne Idee geben:

PHP-Code:
<?php

class CssTokenizer
{
    protected 
$chars;
    protected 
$tokens;

    
/**
     * Get character at index
     *
     * @param int $index
     * @return string
     */
    
protected function gc($index)
    {
        
$c null;

        if (
$index count($this->chars)) {
            
$c $this->chars[$index];
        }

        return 
$c;
    }

    
/**
     *
     * @param string $input
     */
    
public function tokenize($input)
    {
        
$this->chars preg_split('/(?<!^)(?!$)/u'$input);
        
$this->tokens = array();
        
$n 0;
        
$length mb_strlen($input);
        
$isInComment  false;
        
$isInString   false;
        
$isInRuleSet false;
        
$stringDelimiter '';
        
$buffer '';

        while (
$n $length) {
            
$c $this->chars[$n];

            switch (
$c) {
                case 
'/':
                    if (!
$isInComment && !$isInString) {
                        if (
$this->gc($n 1) === '*') {
                            
$isInComment true;
                            
$this->tokens[] = array('default'$buffer);
                            
$buffer '';
                            
$buffer .= $c $this->gc($n 1);
                            
$n += 2;
                        } else {
                            
$buffer .= $c;
                            
$n++;
                        }
                    } else {
                        
$buffer .= $c;
                        
$n++;
                    }
                    break;

                case 
'*':
                    if (
$isInComment) {
                        if (
$this->gc($n 1) === '/') {
                            
$buffer .= $c $this->gc($n 1);
                            
$n += 2;
                            
$this->tokens[] = array('comment'$buffer);
                            
$isInComment false;
                            
$buffer '';
                        } else {
                            
$buffer .= $c;
                            
$n++;
                        }
                    } else {
                        
$buffer .= $c;
                        
$n++;
                    }
                    break;

                case 
'"':
                case 
'\'':
                    if (!
$isInComment && !$isInString) {
                        
$isInString true;
                        
$stringDelimiter $c;
                        
$this->tokens[] = array('default'$buffer);
                        
$buffer '';
                        
$buffer .= $c;
                        
$n++;
                    } else {
                        if (
$isInString && $stringDelimiter === $c) {
                            
$isInString false;
                            
$stringDelimiter '';
                            
$buffer .= $c;
                            
$n++;
                            
$this->tokens[] = array('string'$buffer);
                            
$buffer '';
                        } else {
                            
$buffer .= $c;
                            
$n++;
                        }
                    }
                    break;

                case 
'{':
                    if (!
$isInComment && !$isInString) {
                        if (!
$isInRuleSet) {
                            
$isInRuleSet true;
                            
$this->tokens[] = array('default'$buffer);
                            
$this->tokens[] = array('rule_start'$c);
                            
$buffer '';
                            
$n++;
                        } else {
                            
$this->tokens[] = array('default'$buffer);
                            
$this->tokens[] = array('error'$c);
                            
$buffer '';
                            
$n++;
                        }
                    } else {
                        
$buffer .= $c;
                        
$n++;
                    }
                    break;

                case 
'}':
                    if (!
$isInComment && !$isInString) {
                        if (
$isInRuleSet) {
                            
$isInRuleSet false;
                            
$this->tokens[] = array('default'$buffer);
                            
$this->tokens[] = array('rule_end'$c);
                            
$buffer '';
                            
$n++;
                        } else {
                            
$this->tokens[] = array('default'$buffer);
                            
$this->tokens[] = array('error'$c);
                            
$buffer '';
                            
$n++;
                        }
                    } else {
                        
$buffer .= $c;
                        
$n++;
                    }
                    break;

                default:
                    
$buffer .= $c;
                    
$n++;
                    break;
            }
        }

        if (
$buffer !== '') {
            
$this->tokens[] = array('default'$buffer);
        }

        
// Remove empty tokens

        
$oldTokens $this->tokens;
        
$this->tokens = array();

        foreach (
$oldTokens as $token) {
            if (
$token[1] !== '') {
                
$this->tokens[] = $token;
            }
        }

        return 
$this->tokens;
    }
}

$input = <<<EOT
/* Mein tolles Stylesheet */
#head{
    width: 1014px;
    height: 34px;
    /* } */
    background: #D3D3D3;
    padding: 5px;
    content: "Hello 'World!'";
    border-style: solid;
    border-width: 5px;
    border-color: #262626;
}

/*
    "Hello 'World!'"

    'This "is" a test'

    /*

    test {
        content: 'Hello "World!"';
    }
*/
#navigation{
    list-style-type: none;
    display: inline;
    vertical-align: bottom;
    content: 'This "is" a test.';
}

/* Ein weiterer Kommentar */
#navigation li{
    display: block;
    position: relative;
    float: left;
}

selector {
    /* Rule set */
    {
}}
EOT;

$c = new CssTokenizer();
$tokens $c->tokenize($input);

$output '';

$e = function ($s) {
    return 
htmlspecialchars($sENT_QUOTES'UTF-8');
};

$output '';

foreach (
$tokens as $token) {
    list(
$class$content) = $token;

    
$output .= '<span class="' $e($class) . '">' $e($content) . '</span>';
}

?><!DOCTYPE html>

<html lang="en">

    <head>
        <meta charset="UTF-8" />
        <title>CSS Parser</title>
        <style>
/*<![CDATA[*/
pre {
    border: 1px solid #ccc;
    padding: 5px;
}

.string {
    background: #f0f0f0;
    color: green;
}

.comment {
    color: orange;
}

.rule_start, .rule_end {
    color: red;
}

.error {
    background: red;
    color: white;
}

/*]]>*/
        </style>
    </head>

    <body>

        <pre><code><?=$output?></code></pre>

    </body>

</html>
Mit Zitat antworten
Sponsored Links