File modules/dsl/DslLexer.class.php

Last commit: Tue Jul 19 00:10:00 2022 +0200	Jan Dankert	New: Scripting language: Ignore Keyword "new"; Support for Calling object constructors; Splitting objects into an instance and a wrapper.
1 <?php 2 namespace dsl; 3 4 class DslLexer 5 { 6 private $token = []; 7 8 9 const KEYWORDS = [ 10 'function' => DslToken::T_FUNCTION, 11 'for' => DslToken::T_FOR, 12 'if' => DslToken::T_IF, 13 'else' => DslToken::T_ELSE, 14 'let' => DslToken::T_LET, 15 'const' => DslToken::T_LET, 16 'var' => DslToken::T_LET, 17 'return' => DslToken::T_RETURN, 18 'new' => DslToken::T_NEW, 19 'throw' => DslToken::T_THROW, 20 'null' => DslToken::T_NULL, 21 'true' => DslToken::T_TRUE, 22 'false' => DslToken::T_FALSE, 23 ]; 24 25 const IGNORED_KEYWORDS = [ 26 'let', 27 'new', 28 ]; 29 30 const UNUSED_KEYWORDS = [ 31 'implements', 32 'interface', 33 'package', 34 'private', 35 'protected', 36 'public', 37 'static', 38 'in', 39 'do', 40 'try', 41 'catch', 42 'finally', 43 'this', 44 'case', 45 'void', 46 'with', 47 'enum', 48 'while', 49 'break', 50 'yield', 51 'class', 52 'super', 53 'typeof', 54 'delete', 55 'switch', 56 'export', 57 'import', 58 'default', 59 'extends', 60 'continue', 61 'debugger', 62 'instanceof', 63 'goto', // ;) 64 ]; 65 /** 66 * @param $code 67 * @return array(DslToken) 68 */ 69 public function tokenize( $code ) { 70 71 //echo "Code: <pre>".$code."</pre>"; 72 73 $line = 1; 74 75 // mb_str_split only available since PHP 7.4 76 $chars = str_split($code); 77 78 while( true ) { 79 $char = array_shift($chars); 80 81 if ( $char == null ) 82 break; 83 84 if ( ( $char == ' ' )) 85 continue; 86 87 if ( ( $char == "\n" )) { 88 $line++; 89 continue; 90 } 91 92 // Text 93 if ( $char == '"' || $char == "'" ) { 94 $textEncloser = $char; 95 $value = ''; 96 while( true ) { 97 $char = array_shift($chars); 98 if ( $char == "\n") 99 throw new DslParserException("Unclosed string",$line); 100 if ( $char == '\\') { 101 $char = array_shift($chars); 102 if ( $char == 'n' ) 103 $value = "\n"; 104 elseif ( $char == 't' ) 105 $value = "\t"; 106 else 107 $value .= $char; 108 } 109 elseif ($char != $textEncloser) { 110 $value .= $char; 111 continue; 112 } else { 113 $this->addToken($line, DslToken::T_TEXT, $value); 114 break; 115 } 116 } 117 continue; 118 } 119 120 // Comments 121 if ( $char == '/' ) { 122 $nextChar = array_shift($chars); 123 if ( $nextChar == '/' ) { // Comment after "//" 124 125 while( true ) { 126 $c = array_shift($chars); 127 if ($c == "\n") 128 $line++; 129 if ($c == "\n" || $c == null ) 130 continue 2; 131 } 132 133 } 134 elseif ( $nextChar == '*' ) { // Comment after "/*" 135 136 $lastChar = null; 137 while( true ) { 138 $c = array_shift($chars); 139 if ( $c == null ) 140 break 2; 141 if ($c == "\n") 142 $line++; 143 if ( $lastChar == '*' && $c == '/') 144 continue 2; 145 $lastChar = $c; 146 continue; 147 } 148 149 } 150 else { 151 array_unshift($chars,$nextChar); // this is no comment 152 } 153 } 154 155 // String 156 if ( ( $char >= 'a' && $char <= 'z') || 157 ( $char >= 'A' && $char <= 'Z') || 158 $char == '_' || 159 $char == '$' ) { 160 $value = $char; 161 while( true ) { 162 $char = array_shift( $chars ); 163 if ( ( $char >= 'a' && $char <= 'z') || 164 ( $char >= 'A' && $char <= 'Z') || 165 ( $char >= '0' && $char <= '9') || 166 $char == '_' || 167 $char == '$' ) { 168 $value .= $char; 169 } else { 170 $type = DslToken::T_STRING; 171 172 if ( array_search($value,self::UNUSED_KEYWORDS ) !== false ) 173 throw new DslParserException( 'use of reserved word \''.$value.'\' is not allowed.'); 174 175 if ( array_search($value,self::IGNORED_KEYWORDS ) !== false ) 176 break; // ignore this keyword 177 178 if ( array_key_exists($value,self::KEYWORDS ) ) 179 $type = self::KEYWORDS[$value]; // it is a keyword 180 181 $this->addToken( $line,$type,$value ); 182 array_unshift($chars,$char); 183 break; 184 } 185 } 186 continue; 187 } 188 189 // Numbers 190 // TODO we have a problem with 191 // - "-" is an operator, so we cannot parse negative numbers 192 // - "." is the property char, so we cannot parse decimal values 193 if ( $char >= '0' && $char <= '9' ) { 194 $value = $char; 195 while( true ) { 196 $char = array_shift( $chars ); 197 if ( ( $char >= '0' && $char <= '9') || 198 $char == '_' ) { 199 $value .= $char; 200 } else { 201 $this->addToken( $line,DslToken::T_NUMBER,str_replace('_','',$value )); 202 array_unshift($chars,$char); 203 break; 204 } 205 } 206 continue; 207 } 208 209 $operatorChars = ['>','<','+' ,'-','/' ,'*','=','|','&',',','.' ]; 210 if ( in_array($char,$operatorChars)) { 211 212 $value = $char; 213 while( true ) { 214 $char = array_shift( $chars ); 215 if ( in_array($char,$operatorChars) ) { 216 $value .= $char; 217 } else { 218 $type = DslToken::T_OPERATOR; 219 $this->addToken( $line,$type,$value ); 220 array_unshift($chars,$char); 221 continue 2; 222 } 223 } 224 continue; 225 } 226 227 if ( $char == "\r" ) 228 continue; 229 elseif ( $char == '!' ) 230 $this->addToken( $line,DslToken::T_NEGATION,$char); 231 elseif ( $char == ';' ) 232 $this->addToken( $line,DslToken::T_STATEMENT_END,$char); 233 elseif ( $char == '.' ) 234 $this->addToken( $line,DslToken::T_DOT,$char); 235 elseif ( $char == ',' ) 236 $this->addToken( $line,DslToken::T_COMMA,$char); 237 238 elseif ( $char == '(' ) { 239 if ( end( $this->token)->type == DslToken::T_STRING) 240 // if string is followed by "(" it is a function or a function call 241 $this->addToken( $line, DslToken::T_OPERATOR,'$'); // function call 242 $this->addToken( $line,DslToken::T_BRACKET_OPEN,$char); 243 } 244 elseif ( $char == ')' ) { 245 if (end($this->token)->type == DslToken::T_BRACKET_OPEN) 246 // if there is an empty parenthesis, make it contain something, otherwise the shunting yard algo will fail. 247 $this->addToken($line, DslToken::T_NONE ); // 248 $this->addToken($line, DslToken::T_BRACKET_CLOSE, $char); 249 } 250 elseif ( $char == '{' ) 251 $this->addToken( $line,DslToken::T_BLOCK_BEGIN,$char); 252 elseif ( $char == '}' ) 253 $this->addToken( $line,DslToken::T_BLOCK_END,$char); 254 else { 255 throw new DslParserException('Unknown character \''.$char.'\'',$line); 256 } 257 } 258 259 260 return $this->token; 261 } 262 263 private function addToken(int $line, $type, $value=null) 264 { 265 $this->token[] = new DslToken( $line, $type, $value ); 266 } 267 268 269 }
Download modules/dsl/DslLexer.class.php
History Tue, 19 Jul 2022 00:10:00 +0200 Jan Dankert New: Scripting language: Ignore Keyword "new"; Support for Calling object constructors; Splitting objects into an instance and a wrapper. Fri, 1 Jul 2022 18:09:05 +0200 Jan Dankert New: Bugfixes and much more string and array functions for the DSL. Sun, 26 Jun 2022 16:03:50 +0200 Jan Dankert New: Supporting line breaks in DSL. Sun, 26 Jun 2022 15:46:54 +0200 Jan Dankert Fix: Another, little better, hack for parameterless functions. Shunting yard seems to be unable to handle empty parentheses. Sun, 26 Jun 2022 12:51:07 +0200 Jan Dankert New: DSL can be controlled by flags; support for error messages; support for negativ numbers. Tue, 7 Jun 2022 23:30:20 +0200 Jan Dankert New: DSL is now supporting throw statements. Tue, 7 Jun 2022 21:44:12 +0200 Jan Dankert Fix: DSL Lexer: Allow '<' and '>' in operators. Tue, 7 Jun 2022 21:43:48 +0200 Jan Dankert Change: DSL Lexer: Allow underscores in Numbers (like in real javascript) Sun, 29 May 2022 16:56:40 +0200 Jan Dankert New: DSL with support for functions with return values, full arithmetic, object properties Sat, 28 May 2022 18:00:13 +0200 Jan Dankert New: DSL with a much better syntax parsing and support for assignments, conditions, ... Wed, 25 May 2022 22:47:17 +0200 Jan Dankert New: DSL (domain specific language) for code elements. The old way with PHP code ist not sandboxed and unsecure. This approach is a minimalistic, javascript-like, scripting engine. For now only simple function calls are possible, for example: alert("example");