openrat-cms

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README

DslLexer.class.php (6067B)


      1 <?php
      2 namespace dsl;
      3 
      4 class DslLexer
      5 {
      6 	private $token = [];
      7 
      8 
      9 	const KEYWORDS = [
     10 		'function' => DslToken::T_FUNCTION,
     11 		'for'      => DslToken::T_FOR,
     12 		'if'       => DslToken::T_IF,
     13 		'else'     => DslToken::T_ELSE,
     14 		'let'      => DslToken::T_LET,
     15 		'const'    => DslToken::T_LET,
     16 		'var'      => DslToken::T_LET,
     17 		'return'   => DslToken::T_RETURN,
     18 		'new'      => DslToken::T_NEW,
     19 		'throw'    => DslToken::T_THROW,
     20 		'null'     => DslToken::T_NULL,
     21 		'true'     => DslToken::T_TRUE,
     22 		'false'    => DslToken::T_FALSE,
     23 	];
     24 
     25 	const UNUSED_KEYWORDS = [
     26 		'implements',
     27 		'interface',
     28 		'package',
     29 		'private',
     30 		'protected',
     31 		'public',
     32 		'static',
     33 		'in',
     34 		'do',
     35 		'new',
     36 		'try',
     37 		'this',
     38 		'case',
     39 		'void',
     40 		'with',
     41 		'enum',
     42 		'while',
     43 		'break',
     44 		'catch',
     45 		'throw',
     46 		'yield',
     47 		'class',
     48 		'super',
     49 		'typeof',
     50 		'delete',
     51 		'switch',
     52 		'export',
     53 		'import',
     54 		'default',
     55 		'finally',
     56 		'extends',
     57 		'continue',
     58 		'debugger',
     59 		'instanceof',
     60 		];
     61 	/**
     62 	 * @param $code
     63 	 * @return array(DslToken)
     64 	 */
     65 	public function tokenize( $code ) {
     66 
     67 		//echo "Code: <pre>".$code."</pre>";
     68 
     69 		$line = 1;
     70 
     71 		// mb_str_split only available since PHP 7.4
     72 		$chars = str_split($code);
     73 
     74 		while( true ) {
     75 			$char = array_shift($chars);
     76 
     77 			if   ( $char == null )
     78 				break;
     79 
     80 			if   ( ( $char == ' ' ))
     81 				continue;
     82 
     83 			if   ( ( $char == "\n" )) {
     84 				$line++;
     85 				continue;
     86 			}
     87 
     88 			// Text
     89 			if   ( $char == '"' || $char == "'" ) {
     90 				$textEncloser = $char;
     91 				$value = '';
     92 				while( true ) {
     93 					$char = array_shift($chars);
     94 					if   ( $char == "\n")
     95 						throw new DslParserException("Unclosed string",$line);
     96 					if   ( $char == '\\') {
     97 						$char = array_shift($chars);
     98 						if   ( $char == 'n' )
     99 							$value = "\n";
    100 						elseif ( $char == 't' )
    101 							$value = "\t";
    102 						else
    103 							$value .= $char;
    104 					}
    105 					elseif ($char != $textEncloser) {
    106 						$value .= $char;
    107 						continue;
    108 					} else {
    109 						$this->addToken($line, DslToken::T_TEXT, $value);
    110 						break;
    111 					}
    112 				}
    113 				continue;
    114 			}
    115 
    116 			// Comments
    117 			if   ( $char == '/' ) {
    118 				$nextChar = array_shift($chars);
    119 				if   ( $nextChar == '/' ) { // Comment after "//"
    120 
    121 					while( true ) {
    122 						$c = array_shift($chars);
    123 						if ($c == "\n")
    124 							$line++;
    125 						if ($c == "\n" || $c == null )
    126 							continue 2;
    127 					}
    128 
    129 				}
    130 				elseif   ( $nextChar == '*' ) { // Comment after "/*"
    131 
    132 					$lastChar = null;
    133 					while( true ) {
    134 						$c = array_shift($chars);
    135 						if   ( $c == null )
    136 							break 2;
    137 						if ($c == "\n")
    138 							$line++;
    139 						if   ( $lastChar == '*' && $c == '/')
    140 							continue 2;
    141 						$lastChar = $c;
    142 						continue;
    143 					}
    144 
    145 				}
    146 				else {
    147 					array_unshift($chars,$nextChar); // this is no comment
    148 				}
    149 			}
    150 
    151 			// String
    152 			if   ( ( $char >= 'a' && $char <= 'z') ||
    153 				   ( $char >= 'A' && $char <= 'Z') ||
    154 				   $char == '_'                    ||
    155 				   $char == '$' ) {
    156 				$value = $char;
    157 				while( true ) {
    158 					$char = array_shift( $chars );
    159 					if   ( ( $char >= 'a' && $char <= 'z') ||
    160 						( $char >= 'A' && $char <= 'Z') ||
    161 						( $char >= '0' && $char <= '9') ||
    162 						$char == '_'                    ||
    163 						$char == '$' ) {
    164 						$value .= $char;
    165 					} else {
    166 						$type = DslToken::T_STRING;
    167 
    168 						if   ( array_key_exists($value,self::UNUSED_KEYWORDS ) )
    169 							throw new DslParserException( 'use of reserved word \''.$value.'\' is not allowed.');
    170 
    171 						if   ( array_key_exists($value,self::KEYWORDS ) )
    172 							$type = self::KEYWORDS[$value]; // it is a keyword
    173 
    174 						$this->addToken( $line,$type,$value );
    175 						array_unshift($chars,$char);
    176 						break;
    177 					}
    178 				}
    179 				continue;
    180 			}
    181 
    182 			// Numbers
    183 			// TODO we have a problem with
    184 			// - "-" is an operator, so we cannot parse negative numbers
    185 			// - "." is the property char, so we cannot parse decimal values
    186 			if   ( $char >= '0' && $char <= '9' ) {
    187 				$value = $char;
    188 				while( true ) {
    189 					$char = array_shift( $chars );
    190 					if   ( ( $char >= '0' && $char <= '9') ||
    191 						$char == '_' ) {
    192 						$value .= $char;
    193 					} else {
    194 						$this->addToken( $line,DslToken::T_NUMBER,str_replace('_','',$value ));
    195 						array_unshift($chars,$char);
    196 						break;
    197 					}
    198 				}
    199 				continue;
    200 			}
    201 
    202 			$operatorChars = ['>','<','+' ,'-','/' ,'*','=','|','&',',','.' ];
    203 			if   ( in_array($char,$operatorChars)) {
    204 
    205 				$value = $char;
    206 				while( true ) {
    207 					$char = array_shift( $chars );
    208 					if   ( in_array($char,$operatorChars) ) {
    209 						$value .= $char;
    210 					} else {
    211 						$type = DslToken::T_OPERATOR;
    212 						$this->addToken( $line,$type,$value );
    213 						array_unshift($chars,$char);
    214 						continue 2;
    215 					}
    216 				}
    217 				continue;
    218 			}
    219 
    220 			if   ( $char == "\r" )
    221 				continue;
    222 			elseif   ( $char == '!' )
    223 				$this->addToken( $line,DslToken::T_NEGATION,$char);
    224 			elseif   ( $char == ';' )
    225 				$this->addToken( $line,DslToken::T_STATEMENT_END,$char);
    226 			elseif   ( $char == '.' )
    227 				$this->addToken( $line,DslToken::T_DOT,$char);
    228 			elseif   ( $char == ',' )
    229 				$this->addToken( $line,DslToken::T_COMMA,$char);
    230 
    231 			elseif   ( $char == '(' ) {
    232 				if  ( end( $this->token)->type == DslToken::T_STRING)
    233 					// if string is followed by "(" it is a function or a function call
    234 					$this->addToken( $line, DslToken::T_OPERATOR,'$'); // function call
    235 				$this->addToken( $line,DslToken::T_BRACKET_OPEN,$char);
    236 			}
    237 			elseif   ( $char == ')' ) {
    238 				if (end($this->token)->type == DslToken::T_BRACKET_OPEN)
    239 					// if there is an empty parenthesis, make it contain something, otherwise the shunting yard algo will fail.
    240 					$this->addToken($line, DslToken::T_NONE ); //
    241 				$this->addToken($line, DslToken::T_BRACKET_CLOSE, $char);
    242 			}
    243 			elseif   ( $char == '{' )
    244 				$this->addToken( $line,DslToken::T_BLOCK_BEGIN,$char);
    245 			elseif   ( $char == '}' )
    246 				$this->addToken( $line,DslToken::T_BLOCK_END,$char);
    247 			else {
    248 				throw new DslParserException('Unknown character \''.$char.'\'',$line);
    249 			}
    250 		}
    251 
    252 
    253 		return $this->token;
    254 	}
    255 
    256 	private function addToken(int $line, $type, $value=null)
    257 	{
    258 		$this->token[] = new DslToken( $line, $type, $value );
    259 	}
    260 
    261 
    262 }