DslLexer.class.php (6067B)
1 <?php 2 namespace dsl; 3 4 class DslLexer 5 { 6 private $token = []; 7 8 9 const KEYWORDS = [ 10 'function' => DslToken::T_FUNCTION, 11 'for' => DslToken::T_FOR, 12 'if' => DslToken::T_IF, 13 'else' => DslToken::T_ELSE, 14 'let' => DslToken::T_LET, 15 'const' => DslToken::T_LET, 16 'var' => DslToken::T_LET, 17 'return' => DslToken::T_RETURN, 18 'new' => DslToken::T_NEW, 19 'throw' => DslToken::T_THROW, 20 'null' => DslToken::T_NULL, 21 'true' => DslToken::T_TRUE, 22 'false' => DslToken::T_FALSE, 23 ]; 24 25 const UNUSED_KEYWORDS = [ 26 'implements', 27 'interface', 28 'package', 29 'private', 30 'protected', 31 'public', 32 'static', 33 'in', 34 'do', 35 'new', 36 'try', 37 'this', 38 'case', 39 'void', 40 'with', 41 'enum', 42 'while', 43 'break', 44 'catch', 45 'throw', 46 'yield', 47 'class', 48 'super', 49 'typeof', 50 'delete', 51 'switch', 52 'export', 53 'import', 54 'default', 55 'finally', 56 'extends', 57 'continue', 58 'debugger', 59 'instanceof', 60 ]; 61 /** 62 * @param $code 63 * @return array(DslToken) 64 */ 65 public function tokenize( $code ) { 66 67 //echo "Code: <pre>".$code."</pre>"; 68 69 $line = 1; 70 71 // mb_str_split only available since PHP 7.4 72 $chars = str_split($code); 73 74 while( true ) { 75 $char = array_shift($chars); 76 77 if ( $char == null ) 78 break; 79 80 if ( ( $char == ' ' )) 81 continue; 82 83 if ( ( $char == "\n" )) { 84 $line++; 85 continue; 86 } 87 88 // Text 89 if ( $char == '"' || $char == "'" ) { 90 $textEncloser = $char; 91 $value = ''; 92 while( true ) { 93 $char = array_shift($chars); 94 if ( $char == "\n") 95 throw new DslParserException("Unclosed string",$line); 96 if ( $char == '\\') { 97 $char = array_shift($chars); 98 if ( $char == 'n' ) 99 $value = "\n"; 100 elseif ( $char == 't' ) 101 $value = "\t"; 102 else 103 $value .= $char; 104 } 105 elseif ($char != $textEncloser) { 106 $value .= $char; 107 continue; 108 } else { 109 $this->addToken($line, DslToken::T_TEXT, $value); 110 break; 111 } 112 } 113 continue; 114 } 115 116 // Comments 117 if ( $char == '/' ) { 118 $nextChar = array_shift($chars); 119 if ( $nextChar == '/' ) { // Comment after "//" 120 121 while( true ) { 122 $c = array_shift($chars); 123 if ($c == "\n") 124 $line++; 125 if ($c == "\n" || $c == null ) 126 continue 2; 127 } 128 129 } 130 elseif ( $nextChar == '*' ) { // Comment after "/*" 131 132 $lastChar = null; 133 while( true ) { 134 $c = array_shift($chars); 135 if ( $c == null ) 136 break 2; 137 if ($c == "\n") 138 $line++; 139 if ( $lastChar == '*' && $c == '/') 140 continue 2; 141 $lastChar = $c; 142 continue; 143 } 144 145 } 146 else { 147 array_unshift($chars,$nextChar); // this is no comment 148 } 149 } 150 151 // String 152 if ( ( $char >= 'a' && $char <= 'z') || 153 ( $char >= 'A' && $char <= 'Z') || 154 $char == '_' || 155 $char == '$' ) { 156 $value = $char; 157 while( true ) { 158 $char = array_shift( $chars ); 159 if ( ( $char >= 'a' && $char <= 'z') || 160 ( $char >= 'A' && $char <= 'Z') || 161 ( $char >= '0' && $char <= '9') || 162 $char == '_' || 163 $char == '$' ) { 164 $value .= $char; 165 } else { 166 $type = DslToken::T_STRING; 167 168 if ( array_key_exists($value,self::UNUSED_KEYWORDS ) ) 169 throw new DslParserException( 'use of reserved word \''.$value.'\' is not allowed.'); 170 171 if ( array_key_exists($value,self::KEYWORDS ) ) 172 $type = self::KEYWORDS[$value]; // it is a keyword 173 174 $this->addToken( $line,$type,$value ); 175 array_unshift($chars,$char); 176 break; 177 } 178 } 179 continue; 180 } 181 182 // Numbers 183 // TODO we have a problem with 184 // - "-" is an operator, so we cannot parse negative numbers 185 // - "." is the property char, so we cannot parse decimal values 186 if ( $char >= '0' && $char <= '9' ) { 187 $value = $char; 188 while( true ) { 189 $char = array_shift( $chars ); 190 if ( ( $char >= '0' && $char <= '9') || 191 $char == '_' ) { 192 $value .= $char; 193 } else { 194 $this->addToken( $line,DslToken::T_NUMBER,str_replace('_','',$value )); 195 array_unshift($chars,$char); 196 break; 197 } 198 } 199 continue; 200 } 201 202 $operatorChars = ['>','<','+' ,'-','/' ,'*','=','|','&',',','.' ]; 203 if ( in_array($char,$operatorChars)) { 204 205 $value = $char; 206 while( true ) { 207 $char = array_shift( $chars ); 208 if ( in_array($char,$operatorChars) ) { 209 $value .= $char; 210 } else { 211 $type = DslToken::T_OPERATOR; 212 $this->addToken( $line,$type,$value ); 213 array_unshift($chars,$char); 214 continue 2; 215 } 216 } 217 continue; 218 } 219 220 if ( $char == "\r" ) 221 continue; 222 elseif ( $char == '!' ) 223 $this->addToken( $line,DslToken::T_NEGATION,$char); 224 elseif ( $char == ';' ) 225 $this->addToken( $line,DslToken::T_STATEMENT_END,$char); 226 elseif ( $char == '.' ) 227 $this->addToken( $line,DslToken::T_DOT,$char); 228 elseif ( $char == ',' ) 229 $this->addToken( $line,DslToken::T_COMMA,$char); 230 231 elseif ( $char == '(' ) { 232 if ( end( $this->token)->type == DslToken::T_STRING) 233 // if string is followed by "(" it is a function or a function call 234 $this->addToken( $line, DslToken::T_OPERATOR,'$'); // function call 235 $this->addToken( $line,DslToken::T_BRACKET_OPEN,$char); 236 } 237 elseif ( $char == ')' ) { 238 if (end($this->token)->type == DslToken::T_BRACKET_OPEN) 239 // if there is an empty parenthesis, make it contain something, otherwise the shunting yard algo will fail. 240 $this->addToken($line, DslToken::T_NONE ); // 241 $this->addToken($line, DslToken::T_BRACKET_CLOSE, $char); 242 } 243 elseif ( $char == '{' ) 244 $this->addToken( $line,DslToken::T_BLOCK_BEGIN,$char); 245 elseif ( $char == '}' ) 246 $this->addToken( $line,DslToken::T_BLOCK_END,$char); 247 else { 248 throw new DslParserException('Unknown character \''.$char.'\'',$line); 249 } 250 } 251 252 253 return $this->token; 254 } 255 256 private function addToken(int $line, $type, $value=null) 257 { 258 $this->token[] = new DslToken( $line, $type, $value ); 259 } 260 261 262 }