Array ( [op] => ODER [left] => Array ( [op] => UND [left] => arg1 und so [right] => arg5 ) [right] => Array ( [op] => UND [left] => Array ( [op] => ODER [left] => arg2 [right] => arg6 ) [right] => arg3 ) )
<?php
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* Copyright (c) 2008 Yannik Hampe *
* *
* Permission is hereby granted, free of charge, to any person *
* obtaining a copy of this software and associated documentation *
* files (the "Software"), to deal in the Software without *
* restriction, including without limitation the rights to use, *
* copy, modify, merge, publish, distribute, sublicense, and/or sell *
* copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following *
* conditions: *
* *
* The above copyright notice and this permission notice shall be *
* included in all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, *
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES *
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND *
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT *
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, *
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
* OTHER DEALINGS IN THE SOFTWARE. *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* The input can come from many sources (String, File, Socket,...)
* You can derive from this class to make your datasource available to the parser
* Dir
*/
abstract class SearchStringDataSource
{
/**
* @return next char from datasource. It should return -1, if the DataSource reached its end
*/
abstract public function next();
}
/**
* Implementation of SearchStringDataSource, using a String as Backend
*/
class StringSSDataSource extends SearchStringDataSource
{
private $string;
private $index;
public function __construct($s)
{
$this->string =$s;
$this->index =-1;
}
public function next()
{
++$this->index;
if ($this->index >= strlen($this->string)) return -1;
return $this->string[$this->index];
}
}
class SearchStringTokenizer
{
/**
* List of available keywords that map to a unique identifier.
* You might notice, that this is the Quick&Dirty way. But it works
*/
public static $keywords =array('UND' =>1, 'ODER' =>2);
/**
* List of operators. Same as with keywords
*/
public static $operatoren =array('(' => 101, ')' => 102);
/**
* Everything else, that is not represented by a it's name in the source
*/
public static $other =array('string' => 200, 'eof' => 201);
private $dataSource;
private $peek;
public $curType;
public $curValue;
public function __construct(SearchStringDataSource $ds)
{
$this->dataSource =$ds;
$this->next();
$this->nextToken();
}
private function next()
{
$this->peek =$this->dataSource->next();
}
private function skipWhite()
{
while(trim($this->peek) =='' && $this->peek !=-1) $this->next();
}
public function nextToken()
{
if ($this->peek ==-1)
{
$this->curType =self::$other['eof'];
}
elseif (isset(self::$operatoren[$this->peek]))
{
$this->curType =self::$operatoren[$this->peek];
$this->next();
$this->skipWhite();
}
elseif (preg_match('/[a-zA-Z0-9]/', $this->peek))
{
$tmp ='';
do
{
$tmp .=$this->peek;
$this->next();
} while($this->peek !=-1 && preg_match('/[a-zA-Z0-9]/', $this->peek));
if (isset(self::$keywords[$tmp])) $this->curType =self::$keywords[$tmp];
else
{
$this->curType =self::$other['string'];
$this->curValue =$tmp;
}
$this->skipWhite();
}
elseif ($this->peek =='"')
{
$this->next();
$tmp ='';
while($this->peek !='"' && $this->peek !=-1)
{
if ($this->peek =='\\')
{
$this->next();
$tmp .=$this->peek;
}
$tmp .=$this->peek;
$this->next();
}
$this->next();
$this->skipWhite();
$this->curType =self::$other['string'];
$this->curValue =$tmp;
}
else throw new Exception('Unknown Token: '.$this->peek);
}
}
/**
* The class that you end up using, if you don't modify the source code.
*/
class SearchStringParser
{
private $tokenizer;
private $result =array();
/**
* The only public method. What do we learn of it? ;-)
*/
public function parse(SearchStringDataSource $ds)
{
$this->tokenizer =new SearchStringTokenizer($ds);
return $this->parseLogic();
}
private function parseLogic()
{
$result =$this->parseIdent();
while($this->tokenizer->curType !=SearchStringTokenizer::$other['eof'] && ($this->tokenizer->curType ==SearchStringTokenizer::$keywords['UND'] || $this->tokenizer->curType ==SearchStringTokenizer::$keywords['ODER']))
{
if ($this->tokenizer->curType ==SearchStringTokenizer::$keywords['UND'])
{
$this->tokenizer->nextToken();
$result =array('op' => 'UND', 'left' => $result, 'right' => $this->parseIdent());
}
if ($this->tokenizer->curType ==SearchStringTokenizer::$keywords['ODER'])
{
$this->tokenizer->nextToken();
$result =array('op' => 'ODER', 'left' => $result, 'right' => $this->parseIdent());
}
}
return $result;
}
private function parseIdent()
{
$result =null;
$done =false;
while(!$done)
{
$done =true;
if ($this->tokenizer->curType ==SearchStringTokenizer::$operatoren['('])
{
$this->tokenizer->nextToken(); // skip (
$result = $this->parseLogic();
if ($this->tokenizer->curType != SearchStringTokenizer::$operatoren[')']) throw new Exception("Closing brace expected");
$this->tokenizer->nextToken(); // skip )
$done =false;
}
else if ($this->tokenizer->curType ==SearchStringTokenizer::$other['string'])
{
$result =$this->tokenizer->curValue;
$done =false;
$this->tokenizer->nextToken();
}
}
return $result;
}
}
/* Sample code */
$p =new SearchStringParser();
print_r($p->parse(new StringSSDataSource('"arg1 und so" UND arg5 ODER ((arg2 ODER arg6) UND arg3)')));
echo '<hr />';
highlight_file(__FILE__);
?>