Du musst das im regulären Ausdruck klammern (runde Klammern), was du später referenzieren willst.
Früher oder später wirst du aber vermutlich einen echten Lexer/Parser benötigen.
Ich habe so was hier für CSS mal angefangen. Das sollte dir ne Idee geben:
PHP-Code:
<?php
class CssTokenizer
{
protected $chars;
protected $tokens;
/**
* Get character at index
*
* @param int $index
* @return string
*/
protected function gc($index)
{
$c = null;
if ($index < count($this->chars)) {
$c = $this->chars[$index];
}
return $c;
}
/**
*
* @param string $input
*/
public function tokenize($input)
{
$this->chars = preg_split('/(?<!^)(?!$)/u', $input);
$this->tokens = array();
$n = 0;
$length = mb_strlen($input);
$isInComment = false;
$isInString = false;
$isInRuleSet = false;
$stringDelimiter = '';
$buffer = '';
while ($n < $length) {
$c = $this->chars[$n];
switch ($c) {
case '/':
if (!$isInComment && !$isInString) {
if ($this->gc($n + 1) === '*') {
$isInComment = true;
$this->tokens[] = array('default', $buffer);
$buffer = '';
$buffer .= $c . $this->gc($n + 1);
$n += 2;
} else {
$buffer .= $c;
$n++;
}
} else {
$buffer .= $c;
$n++;
}
break;
case '*':
if ($isInComment) {
if ($this->gc($n + 1) === '/') {
$buffer .= $c . $this->gc($n + 1);
$n += 2;
$this->tokens[] = array('comment', $buffer);
$isInComment = false;
$buffer = '';
} else {
$buffer .= $c;
$n++;
}
} else {
$buffer .= $c;
$n++;
}
break;
case '"':
case '\'':
if (!$isInComment && !$isInString) {
$isInString = true;
$stringDelimiter = $c;
$this->tokens[] = array('default', $buffer);
$buffer = '';
$buffer .= $c;
$n++;
} else {
if ($isInString && $stringDelimiter === $c) {
$isInString = false;
$stringDelimiter = '';
$buffer .= $c;
$n++;
$this->tokens[] = array('string', $buffer);
$buffer = '';
} else {
$buffer .= $c;
$n++;
}
}
break;
case '{':
if (!$isInComment && !$isInString) {
if (!$isInRuleSet) {
$isInRuleSet = true;
$this->tokens[] = array('default', $buffer);
$this->tokens[] = array('rule_start', $c);
$buffer = '';
$n++;
} else {
$this->tokens[] = array('default', $buffer);
$this->tokens[] = array('error', $c);
$buffer = '';
$n++;
}
} else {
$buffer .= $c;
$n++;
}
break;
case '}':
if (!$isInComment && !$isInString) {
if ($isInRuleSet) {
$isInRuleSet = false;
$this->tokens[] = array('default', $buffer);
$this->tokens[] = array('rule_end', $c);
$buffer = '';
$n++;
} else {
$this->tokens[] = array('default', $buffer);
$this->tokens[] = array('error', $c);
$buffer = '';
$n++;
}
} else {
$buffer .= $c;
$n++;
}
break;
default:
$buffer .= $c;
$n++;
break;
}
}
if ($buffer !== '') {
$this->tokens[] = array('default', $buffer);
}
// Remove empty tokens
$oldTokens = $this->tokens;
$this->tokens = array();
foreach ($oldTokens as $token) {
if ($token[1] !== '') {
$this->tokens[] = $token;
}
}
return $this->tokens;
}
}
$input = <<<EOT
/* Mein tolles Stylesheet */
#head{
width: 1014px;
height: 34px;
/* } */
background: #D3D3D3;
padding: 5px;
content: "Hello 'World!'";
border-style: solid;
border-width: 5px;
border-color: #262626;
}
/*
"Hello 'World!'"
'This "is" a test'
/*
test {
content: 'Hello "World!"';
}
*/
#navigation{
list-style-type: none;
display: inline;
vertical-align: bottom;
content: 'This "is" a test.';
}
/* Ein weiterer Kommentar */
#navigation li{
display: block;
position: relative;
float: left;
}
selector {
/* Rule set */
{
}}
EOT;
$c = new CssTokenizer();
$tokens = $c->tokenize($input);
$output = '';
$e = function ($s) {
return htmlspecialchars($s, ENT_QUOTES, 'UTF-8');
};
$output = '';
foreach ($tokens as $token) {
list($class, $content) = $token;
$output .= '<span class="' . $e($class) . '">' . $e($content) . '</span>';
}
?><!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<title>CSS Parser</title>
<style>
/*<![CDATA[*/
pre {
border: 1px solid #ccc;
padding: 5px;
}
.string {
background: #f0f0f0;
color: green;
}
.comment {
color: orange;
}
.rule_start, .rule_end {
color: red;
}
.error {
background: red;
color: white;
}
/*]]>*/
</style>
</head>
<body>
<pre><code><?=$output?></code></pre>
</body>
</html>