DslLexer.class.php (5845B)
1 <?php 2 namespace dsl; 3 4 class DslLexer 5 { 6 private $token = []; 7 8 9 const KEYWORDS = [ 10 'function' => DslToken::T_FUNCTION, 11 'for' => DslToken::T_FOR, 12 'if' => DslToken::T_IF, 13 'else' => DslToken::T_ELSE, 14 'let' => DslToken::T_LET, 15 'const' => DslToken::T_LET, 16 'var' => DslToken::T_LET, 17 'return' => DslToken::T_RETURN, 18 'new' => DslToken::T_NEW, 19 'throw' => DslToken::T_THROW, 20 ]; 21 22 const UNUSED_KEYWORDS = [ 23 'null', 24 'true', 25 'false', 26 'implements', 27 'interface', 28 'package', 29 'private', 30 'protected', 31 'public', 32 'static', 33 'in', 34 'do', 35 'new', 36 'try', 37 'this', 38 'case', 39 'void', 40 'with', 41 'enum', 42 'while', 43 'break', 44 'catch', 45 'throw', 46 'yield', 47 'class', 48 'super', 49 'typeof', 50 'delete', 51 'switch', 52 'export', 53 'import', 54 'default', 55 'finally', 56 'extends', 57 'continue', 58 'debugger', 59 'instanceof', 60 ]; 61 /** 62 * @param $code 63 * @return array(DslToken) 64 */ 65 public function tokenize( $code ) { 66 67 //echo "Code: <pre>".$code."</pre>"; 68 69 $line = 1; 70 71 // mb_str_split only available since PHP 7.4 72 $chars = str_split($code); 73 74 while( true ) { 75 $char = array_shift($chars); 76 77 if ( $char == null ) 78 break; 79 80 if ( ( $char == ' ' )) 81 continue; 82 83 if ( ( $char == "\n" )) { 84 $line++; 85 continue; 86 } 87 88 // Text 89 if ( $char == '"' || $char == "'" ) { 90 $textEncloser = $char; 91 $value = ''; 92 while( true ) { 93 $char = array_shift($chars); 94 if ( $char == "\n") 95 throw new DslParserException("Unclosed string",$line); 96 if ( $char == '\\') { 97 $char = array_shift($chars); 98 if ( $char == 'n' ) 99 $value = "\n"; 100 elseif ( $char == 't' ) 101 $value = "\t"; 102 else 103 $value .= $char; 104 } 105 elseif ($char != $textEncloser) { 106 $value .= $char; 107 continue; 108 } else { 109 $this->addToken($line, DslToken::T_TEXT, $value); 110 break; 111 } 112 } 113 continue; 114 } 115 116 // Comments 117 if ( $char == '/' ) { 118 $nextChar = array_shift($chars); 119 if ( $nextChar == '/' ) { // Comment after "//" 120 121 while( true ) { 122 $c = array_shift($chars); 123 if ($c == "\n") 124 $line++; 125 if ($c == "\n" || $c == null ) 126 continue 2; 127 } 128 129 } 130 elseif ( $nextChar == '*' ) { // Comment after "/*" 131 132 $lastChar = null; 133 while( true ) { 134 $c = array_shift($chars); 135 if ( $c == null ) 136 break 2; 137 if ($c == "\n") 138 $line++; 139 if ( $lastChar == '*' && $c == '/') 140 continue 2; 141 $lastChar = $c; 142 continue; 143 } 144 145 } 146 else { 147 array_unshift($chars,$nextChar); // this is no comment 148 } 149 } 150 151 // String 152 if ( ( $char >= 'a' && $char <= 'z') || 153 ( $char >= 'A' && $char <= 'Z') || 154 $char == '_' || 155 $char == '$' ) { 156 $value = $char; 157 while( true ) { 158 $char = array_shift( $chars ); 159 if ( ( $char >= 'a' && $char <= 'z') || 160 ( $char >= 'A' && $char <= 'Z') || 161 ( $char >= '0' && $char <= '9') || 162 $char == '_' || 163 $char == '$' ) { 164 $value .= $char; 165 } else { 166 $type = DslToken::T_STRING; 167 168 if ( array_key_exists($value,self::UNUSED_KEYWORDS ) ) 169 throw new DslParserException( 'use of reserved word \''.$value.'\' is not allowed.'); 170 171 if ( array_key_exists($value,self::KEYWORDS ) ) 172 $type = self::KEYWORDS[$value]; // it is a keyword 173 174 $this->addToken( $line,$type,$value ); 175 array_unshift($chars,$char); 176 break; 177 } 178 } 179 continue; 180 } 181 182 // Numbers 183 if ( $char >= '0' && $char <= '9' ) { 184 $value = $char; 185 while( true ) { 186 $char = array_shift( $chars ); 187 if ( ( $char >= '0' && $char <= '9') || 188 $char == '.' || $char == '_' ) { 189 $value .= $char; 190 } else { 191 $this->addToken( $line,DslToken::T_NUMBER,str_replace('_','',$value )); 192 array_unshift($chars,$char); 193 break; 194 } 195 } 196 continue; 197 } 198 199 $operatorChars = ['>','<','+' ,'-','/' ,'*','=','|','&',',','.' ]; 200 if ( in_array($char,$operatorChars)) { 201 202 $value = $char; 203 while( true ) { 204 $char = array_shift( $chars ); 205 if ( in_array($char,$operatorChars) ) { 206 $value .= $char; 207 } else { 208 $type = DslToken::T_OPERATOR; 209 $this->addToken( $line,$type,$value ); 210 array_unshift($chars,$char); 211 continue 2; 212 } 213 } 214 continue; 215 } 216 217 if ( $char == "\r" ) 218 continue; 219 elseif ( $char == '!' ) 220 $this->addToken( $line,DslToken::T_NEGATION,$char); 221 elseif ( $char == ';' ) 222 $this->addToken( $line,DslToken::T_STATEMENT_END,$char); 223 elseif ( $char == '.' ) 224 $this->addToken( $line,DslToken::T_DOT,$char); 225 elseif ( $char == ',' ) 226 $this->addToken( $line,DslToken::T_COMMA,$char); 227 228 elseif ( $char == '(' ) { 229 if ( end( $this->token)->type == DslToken::T_STRING) 230 // if string is followed by "(" it is a function or a function call 231 $this->addToken( $line, DslToken::T_OPERATOR,'$'); // function call 232 $this->addToken( $line,DslToken::T_BRACKET_OPEN,$char); 233 } 234 elseif ( $char == ')' ) { 235 if (end($this->token)->type == DslToken::T_BRACKET_OPEN) 236 // if there is an empty parenthesis, make it contain something, otherwise the shunting yard algo will fail. 237 $this->addToken($line, DslToken::T_NONE ); // 238 $this->addToken($line, DslToken::T_BRACKET_CLOSE, $char); 239 } 240 elseif ( $char == '{' ) 241 $this->addToken( $line,DslToken::T_BLOCK_BEGIN,$char); 242 elseif ( $char == '}' ) 243 $this->addToken( $line,DslToken::T_BLOCK_END,$char); 244 else { 245 throw new DslParserException('Unknown character \''.$char.'\'',$line); 246 } 247 } 248 249 250 return $this->token; 251 } 252 253 private function addToken(int $line, $type, $value=null) 254 { 255 $this->token[] = new DslToken( $line, $type, $value ); 256 } 257 258 259 }