scriptbox

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README

DslLexer.class.php (5845B)


      1 <?php
      2 namespace dsl;
      3 
      4 class DslLexer
      5 {
      6 	private $token = [];
      7 
      8 
      9 	const KEYWORDS = [
     10 		'function' => DslToken::T_FUNCTION,
     11 		'for'      => DslToken::T_FOR,
     12 		'if'       => DslToken::T_IF,
     13 		'else'     => DslToken::T_ELSE,
     14 		'let'      => DslToken::T_LET,
     15 		'const'    => DslToken::T_LET,
     16 		'var'      => DslToken::T_LET,
     17 		'return'   => DslToken::T_RETURN,
     18 		'new'      => DslToken::T_NEW,
     19 		'throw'    => DslToken::T_THROW,
     20 	];
     21 
     22 	const UNUSED_KEYWORDS = [
     23 		'null',
     24 		'true',
     25 		'false',
     26 		'implements',
     27 		'interface',
     28 		'package',
     29 		'private',
     30 		'protected',
     31 		'public',
     32 		'static',
     33 		'in',
     34 		'do',
     35 		'new',
     36 		'try',
     37 		'this',
     38 		'case',
     39 		'void',
     40 		'with',
     41 		'enum',
     42 		'while',
     43 		'break',
     44 		'catch',
     45 		'throw',
     46 		'yield',
     47 		'class',
     48 		'super',
     49 		'typeof',
     50 		'delete',
     51 		'switch',
     52 		'export',
     53 		'import',
     54 		'default',
     55 		'finally',
     56 		'extends',
     57 		'continue',
     58 		'debugger',
     59 		'instanceof',
     60 		];
     61 	/**
     62 	 * @param $code
     63 	 * @return array(DslToken)
     64 	 */
     65 	public function tokenize( $code ) {
     66 
     67 		//echo "Code: <pre>".$code."</pre>";
     68 
     69 		$line = 1;
     70 
     71 		// mb_str_split only available since PHP 7.4
     72 		$chars = str_split($code);
     73 
     74 		while( true ) {
     75 			$char = array_shift($chars);
     76 
     77 			if   ( $char == null )
     78 				break;
     79 
     80 			if   ( ( $char == ' ' ))
     81 				continue;
     82 
     83 			if   ( ( $char == "\n" )) {
     84 				$line++;
     85 				continue;
     86 			}
     87 
     88 			// Text
     89 			if   ( $char == '"' || $char == "'" ) {
     90 				$textEncloser = $char;
     91 				$value = '';
     92 				while( true ) {
     93 					$char = array_shift($chars);
     94 					if   ( $char == "\n")
     95 						throw new DslParserException("Unclosed string",$line);
     96 					if   ( $char == '\\') {
     97 						$char = array_shift($chars);
     98 						if   ( $char == 'n' )
     99 							$value = "\n";
    100 						elseif ( $char == 't' )
    101 							$value = "\t";
    102 						else
    103 							$value .= $char;
    104 					}
    105 					elseif ($char != $textEncloser) {
    106 						$value .= $char;
    107 						continue;
    108 					} else {
    109 						$this->addToken($line, DslToken::T_TEXT, $value);
    110 						break;
    111 					}
    112 				}
    113 				continue;
    114 			}
    115 
    116 			// Comments
    117 			if   ( $char == '/' ) {
    118 				$nextChar = array_shift($chars);
    119 				if   ( $nextChar == '/' ) { // Comment after "//"
    120 
    121 					while( true ) {
    122 						$c = array_shift($chars);
    123 						if ($c == "\n")
    124 							$line++;
    125 						if ($c == "\n" || $c == null )
    126 							continue 2;
    127 					}
    128 
    129 				}
    130 				elseif   ( $nextChar == '*' ) { // Comment after "/*"
    131 
    132 					$lastChar = null;
    133 					while( true ) {
    134 						$c = array_shift($chars);
    135 						if   ( $c == null )
    136 							break 2;
    137 						if ($c == "\n")
    138 							$line++;
    139 						if   ( $lastChar == '*' && $c == '/')
    140 							continue 2;
    141 						$lastChar = $c;
    142 						continue;
    143 					}
    144 
    145 				}
    146 				else {
    147 					array_unshift($chars,$nextChar); // this is no comment
    148 				}
    149 			}
    150 
    151 			// String
    152 			if   ( ( $char >= 'a' && $char <= 'z') ||
    153 				   ( $char >= 'A' && $char <= 'Z') ||
    154 				   $char == '_'                    ||
    155 				   $char == '$' ) {
    156 				$value = $char;
    157 				while( true ) {
    158 					$char = array_shift( $chars );
    159 					if   ( ( $char >= 'a' && $char <= 'z') ||
    160 						( $char >= 'A' && $char <= 'Z') ||
    161 						( $char >= '0' && $char <= '9') ||
    162 						$char == '_'                    ||
    163 						$char == '$' ) {
    164 						$value .= $char;
    165 					} else {
    166 						$type = DslToken::T_STRING;
    167 
    168 						if   ( array_key_exists($value,self::UNUSED_KEYWORDS ) )
    169 							throw new DslParserException( 'use of reserved word \''.$value.'\' is not allowed.');
    170 
    171 						if   ( array_key_exists($value,self::KEYWORDS ) )
    172 							$type = self::KEYWORDS[$value]; // it is a keyword
    173 
    174 						$this->addToken( $line,$type,$value );
    175 						array_unshift($chars,$char);
    176 						break;
    177 					}
    178 				}
    179 				continue;
    180 			}
    181 
    182 			// Numbers
    183 			if   ( $char >= '0' && $char <= '9' ) {
    184 				$value = $char;
    185 				while( true ) {
    186 					$char = array_shift( $chars );
    187 					if   ( ( $char >= '0' && $char <= '9') ||
    188 						$char == '.' || $char == '_' ) {
    189 						$value .= $char;
    190 					} else {
    191 						$this->addToken( $line,DslToken::T_NUMBER,str_replace('_','',$value ));
    192 						array_unshift($chars,$char);
    193 						break;
    194 					}
    195 				}
    196 				continue;
    197 			}
    198 
    199 			$operatorChars = ['>','<','+' ,'-','/' ,'*','=','|','&',',','.' ];
    200 			if   ( in_array($char,$operatorChars)) {
    201 
    202 				$value = $char;
    203 				while( true ) {
    204 					$char = array_shift( $chars );
    205 					if   ( in_array($char,$operatorChars) ) {
    206 						$value .= $char;
    207 					} else {
    208 						$type = DslToken::T_OPERATOR;
    209 						$this->addToken( $line,$type,$value );
    210 						array_unshift($chars,$char);
    211 						continue 2;
    212 					}
    213 				}
    214 				continue;
    215 			}
    216 
    217 			if   ( $char == "\r" )
    218 				continue;
    219 			elseif   ( $char == '!' )
    220 				$this->addToken( $line,DslToken::T_NEGATION,$char);
    221 			elseif   ( $char == ';' )
    222 				$this->addToken( $line,DslToken::T_STATEMENT_END,$char);
    223 			elseif   ( $char == '.' )
    224 				$this->addToken( $line,DslToken::T_DOT,$char);
    225 			elseif   ( $char == ',' )
    226 				$this->addToken( $line,DslToken::T_COMMA,$char);
    227 
    228 			elseif   ( $char == '(' ) {
    229 				if  ( end( $this->token)->type == DslToken::T_STRING)
    230 					// if string is followed by "(" it is a function or a function call
    231 					$this->addToken( $line, DslToken::T_OPERATOR,'$'); // function call
    232 				$this->addToken( $line,DslToken::T_BRACKET_OPEN,$char);
    233 			}
    234 			elseif   ( $char == ')' ) {
    235 				if (end($this->token)->type == DslToken::T_BRACKET_OPEN)
    236 					// if there is an empty parenthesis, make it contain something, otherwise the shunting yard algo will fail.
    237 					$this->addToken($line, DslToken::T_NONE ); //
    238 				$this->addToken($line, DslToken::T_BRACKET_CLOSE, $char);
    239 			}
    240 			elseif   ( $char == '{' )
    241 				$this->addToken( $line,DslToken::T_BLOCK_BEGIN,$char);
    242 			elseif   ( $char == '}' )
    243 				$this->addToken( $line,DslToken::T_BLOCK_END,$char);
    244 			else {
    245 				throw new DslParserException('Unknown character \''.$char.'\'',$line);
    246 			}
    247 		}
    248 
    249 
    250 		return $this->token;
    251 	}
    252 
    253 	private function addToken(int $line, $type, $value=null)
    254 	{
    255 		$this->token[] = new DslToken( $line, $type, $value );
    256 	}
    257 
    258 
    259 }