File geshi/geshi.php

Last commit: Sun Dec 9 23:32:58 2018 +0100	Jan Dankert	Fix: Geshi PHP7-fähig
1 <?php 2 /** 3 * GeSHi - Generic Syntax Highlighter 4 * 5 * The GeSHi class for Generic Syntax Highlighting. Please refer to the 6 * documentation at http://qbnz.com/highlighter/documentation.php for more 7 * information about how to use this class. 8 * 9 * For changes, release notes, TODOs etc, see the relevant files in the docs/ 10 * directory. 11 * 12 * This file is part of GeSHi. 13 * 14 * GeSHi is free software; you can redistribute it and/or modify 15 * it under the terms of the GNU General Public License as published by 16 * the Free Software Foundation; either version 2 of the License, or 17 * (at your option) any later version. 18 * 19 * GeSHi is distributed in the hope that it will be useful, 20 * but WITHOUT ANY WARRANTY; without even the implied warranty of 21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 22 * GNU General Public License for more details. 23 * 24 * You should have received a copy of the GNU General Public License 25 * along with GeSHi; if not, write to the Free Software 26 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 27 * 28 * @package geshi 29 * @subpackage core 30 * @author Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de> 31 * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann 32 * @license http://gnu.org/copyleft/gpl.html GNU GPL 33 * 34 */ 35 36 // 37 // GeSHi Constants 38 // You should use these constant names in your programs instead of 39 // their values - you never know when a value may change in a future 40 // version 41 // 42 43 /** The version of this GeSHi file */ 44 define('GESHI_VERSION', '1.0.8.11'); 45 46 // Define the root directory for the GeSHi code tree 47 if (!defined('GESHI_ROOT')) { 48 /** The root directory for GeSHi */ 49 define('GESHI_ROOT', dirname(__FILE__) . DIRECTORY_SEPARATOR); 50 } 51 /** The language file directory for GeSHi 52 @access private */ 53 define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . DIRECTORY_SEPARATOR); 54 55 // Define if GeSHi should be paranoid about security 56 if (!defined('GESHI_SECURITY_PARANOID')) { 57 /** Tells GeSHi to be paranoid about security settings */ 58 define('GESHI_SECURITY_PARANOID', false); 59 } 60 61 // Line numbers - use with enable_line_numbers() 62 /** Use no line numbers when building the result */ 63 define('GESHI_NO_LINE_NUMBERS', 0); 64 /** Use normal line numbers when building the result */ 65 define('GESHI_NORMAL_LINE_NUMBERS', 1); 66 /** Use fancy line numbers when building the result */ 67 define('GESHI_FANCY_LINE_NUMBERS', 2); 68 69 // Container HTML type 70 /** Use nothing to surround the source */ 71 define('GESHI_HEADER_NONE', 0); 72 /** Use a "div" to surround the source */ 73 define('GESHI_HEADER_DIV', 1); 74 /** Use a "pre" to surround the source */ 75 define('GESHI_HEADER_PRE', 2); 76 /** Use a pre to wrap lines when line numbers are enabled or to wrap the whole code. */ 77 define('GESHI_HEADER_PRE_VALID', 3); 78 /** 79 * Use a "table" to surround the source: 80 * 81 * <table> 82 * <thead><tr><td colspan="2">$header</td></tr></thead> 83 * <tbody><tr><td><pre>$linenumbers</pre></td><td><pre>$code></pre></td></tr></tbody> 84 * <tfooter><tr><td colspan="2">$footer</td></tr></tfoot> 85 * </table> 86 * 87 * this is essentially only a workaround for Firefox, see sf#1651996 or take a look at 88 * https://bugzilla.mozilla.org/show_bug.cgi?id=365805 89 * @note when linenumbers are disabled this is essentially the same as GESHI_HEADER_PRE 90 */ 91 define('GESHI_HEADER_PRE_TABLE', 4); 92 93 // Capatalisation constants 94 /** Lowercase keywords found */ 95 define('GESHI_CAPS_NO_CHANGE', 0); 96 /** Uppercase keywords found */ 97 define('GESHI_CAPS_UPPER', 1); 98 /** Leave keywords found as the case that they are */ 99 define('GESHI_CAPS_LOWER', 2); 100 101 // Link style constants 102 /** Links in the source in the :link state */ 103 define('GESHI_LINK', 0); 104 /** Links in the source in the :hover state */ 105 define('GESHI_HOVER', 1); 106 /** Links in the source in the :active state */ 107 define('GESHI_ACTIVE', 2); 108 /** Links in the source in the :visited state */ 109 define('GESHI_VISITED', 3); 110 111 // Important string starter/finisher 112 // Note that if you change these, they should be as-is: i.e., don't 113 // write them as if they had been run through htmlentities() 114 /** The starter for important parts of the source */ 115 define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>'); 116 /** The ender for important parts of the source */ 117 define('GESHI_END_IMPORTANT', '<END GeSHi>'); 118 119 /**#@+ 120 * @access private 121 */ 122 // When strict mode applies for a language 123 /** Strict mode never applies (this is the most common) */ 124 define('GESHI_NEVER', 0); 125 /** Strict mode *might* apply, and can be enabled or 126 disabled by {@link GeSHi->enable_strict_mode()} */ 127 define('GESHI_MAYBE', 1); 128 /** Strict mode always applies */ 129 define('GESHI_ALWAYS', 2); 130 131 // Advanced regexp handling constants, used in language files 132 /** The key of the regex array defining what to search for */ 133 define('GESHI_SEARCH', 0); 134 /** The key of the regex array defining what bracket group in a 135 matched search to use as a replacement */ 136 define('GESHI_REPLACE', 1); 137 /** The key of the regex array defining any modifiers to the regular expression */ 138 define('GESHI_MODIFIERS', 2); 139 /** The key of the regex array defining what bracket group in a 140 matched search to put before the replacement */ 141 define('GESHI_BEFORE', 3); 142 /** The key of the regex array defining what bracket group in a 143 matched search to put after the replacement */ 144 define('GESHI_AFTER', 4); 145 /** The key of the regex array defining a custom keyword to use 146 for this regexp's html tag class */ 147 define('GESHI_CLASS', 5); 148 149 /** Used in language files to mark comments */ 150 define('GESHI_COMMENTS', 0); 151 152 /** Used to work around missing PHP features **/ 153 define('GESHI_PHP_PRE_433', !(version_compare(PHP_VERSION, '4.3.3') === 1)); 154 155 /** make sure we can call stripos **/ 156 if (!function_exists('stripos')) { 157 // the offset param of preg_match is not supported below PHP 4.3.3 158 if (GESHI_PHP_PRE_433) { 159 /** 160 * @ignore 161 */ 162 function stripos($haystack, $needle, $offset = null) { 163 if (!is_null($offset)) { 164 $haystack = substr($haystack, $offset); 165 } 166 if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE)) { 167 return $match[0][1]; 168 } 169 return false; 170 } 171 } 172 else { 173 /** 174 * @ignore 175 */ 176 function stripos($haystack, $needle, $offset = null) { 177 if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE, $offset)) { 178 return $match[0][1]; 179 } 180 return false; 181 } 182 } 183 } 184 185 /** some old PHP / PCRE subpatterns only support up to xxx subpatterns in 186 regular expressions. Set this to false if your PCRE lib is up to date 187 @see GeSHi->optimize_regexp_list() 188 **/ 189 define('GESHI_MAX_PCRE_SUBPATTERNS', 500); 190 /** it's also important not to generate too long regular expressions 191 be generous here... but keep in mind, that when reaching this limit we 192 still have to close open patterns. 12k should do just fine on a 16k limit. 193 @see GeSHi->optimize_regexp_list() 194 **/ 195 define('GESHI_MAX_PCRE_LENGTH', 12288); 196 197 //Number format specification 198 /** Basic number format for integers */ 199 define('GESHI_NUMBER_INT_BASIC', 1); //Default integers \d+ 200 /** Enhanced number format for integers like seen in C */ 201 define('GESHI_NUMBER_INT_CSTYLE', 2); //Default C-Style \d+[lL]? 202 /** Number format to highlight binary numbers with a suffix "b" */ 203 define('GESHI_NUMBER_BIN_SUFFIX', 16); //[01]+[bB] 204 /** Number format to highlight binary numbers with a prefix % */ 205 define('GESHI_NUMBER_BIN_PREFIX_PERCENT', 32); //%[01]+ 206 /** Number format to highlight binary numbers with a prefix 0b (C) */ 207 define('GESHI_NUMBER_BIN_PREFIX_0B', 64); //0b[01]+ 208 /** Number format to highlight octal numbers with a leading zero */ 209 define('GESHI_NUMBER_OCT_PREFIX', 256); //0[0-7]+ 210 /** Number format to highlight octal numbers with a prefix 0o (logtalk) */ 211 define('GESHI_NUMBER_OCT_PREFIX_0O', 512); //0[0-7]+ 212 /** Number format to highlight octal numbers with a leading @ (Used in HiSofts Devpac series). */ 213 define('GESHI_NUMBER_OCT_PREFIX_AT', 1024); //@[0-7]+ 214 /** Number format to highlight octal numbers with a suffix of o */ 215 define('GESHI_NUMBER_OCT_SUFFIX', 2048); //[0-7]+[oO] 216 /** Number format to highlight hex numbers with a prefix 0x */ 217 define('GESHI_NUMBER_HEX_PREFIX', 4096); //0x[0-9a-fA-F]+ 218 /** Number format to highlight hex numbers with a prefix $ */ 219 define('GESHI_NUMBER_HEX_PREFIX_DOLLAR', 8192); //$[0-9a-fA-F]+ 220 /** Number format to highlight hex numbers with a suffix of h */ 221 define('GESHI_NUMBER_HEX_SUFFIX', 16384); //[0-9][0-9a-fA-F]*h 222 /** Number format to highlight floating-point numbers without support for scientific notation */ 223 define('GESHI_NUMBER_FLT_NONSCI', 65536); //\d+\.\d+ 224 /** Number format to highlight floating-point numbers without support for scientific notation */ 225 define('GESHI_NUMBER_FLT_NONSCI_F', 131072); //\d+(\.\d+)?f 226 /** Number format to highlight floating-point numbers with support for scientific notation (E) and optional leading zero */ 227 define('GESHI_NUMBER_FLT_SCI_SHORT', 262144); //\.\d+e\d+ 228 /** Number format to highlight floating-point numbers with support for scientific notation (E) and required leading digit */ 229 define('GESHI_NUMBER_FLT_SCI_ZERO', 524288); //\d+(\.\d+)?e\d+ 230 //Custom formats are passed by RX array 231 232 // Error detection - use these to analyse faults 233 /** No sourcecode to highlight was specified 234 * @deprecated 235 */ 236 define('GESHI_ERROR_NO_INPUT', 1); 237 /** The language specified does not exist */ 238 define('GESHI_ERROR_NO_SUCH_LANG', 2); 239 /** GeSHi could not open a file for reading (generally a language file) */ 240 define('GESHI_ERROR_FILE_NOT_READABLE', 3); 241 /** The header type passed to {@link GeSHi->set_header_type()} was invalid */ 242 define('GESHI_ERROR_INVALID_HEADER_TYPE', 4); 243 /** The line number type passed to {@link GeSHi->enable_line_numbers()} was invalid */ 244 define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5); 245 /**#@-*/ 246 247 248 /** 249 * The GeSHi Class. 250 * 251 * Please refer to the documentation for GeSHi 1.0.X that is available 252 * at http://qbnz.com/highlighter/documentation.php for more information 253 * about how to use this class. 254 * 255 * @package geshi 256 * @author Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de> 257 * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann 258 */ 259 class GeSHi { 260 /**#@+ 261 * @access private 262 */ 263 /** 264 * The source code to highlight 265 * @var string 266 */ 267 var $source = ''; 268 269 /** 270 * The language to use when highlighting 271 * @var string 272 */ 273 var $language = ''; 274 275 /** 276 * The data for the language used 277 * @var array 278 */ 279 var $language_data = array(); 280 281 /** 282 * The path to the language files 283 * @var string 284 */ 285 var $language_path = GESHI_LANG_ROOT; 286 287 /** 288 * The error message associated with an error 289 * @var string 290 * @todo check err reporting works 291 */ 292 var $error = false; 293 294 /** 295 * Possible error messages 296 * @var array 297 */ 298 var $error_messages = array( 299 GESHI_ERROR_NO_SUCH_LANG => 'GeSHi could not find the language {LANGUAGE} (using path {PATH})', 300 GESHI_ERROR_FILE_NOT_READABLE => 'The file specified for load_from_file was not readable', 301 GESHI_ERROR_INVALID_HEADER_TYPE => 'The header type specified is invalid', 302 GESHI_ERROR_INVALID_LINE_NUMBER_TYPE => 'The line number type specified is invalid' 303 ); 304 305 /** 306 * Whether highlighting is strict or not 307 * @var boolean 308 */ 309 var $strict_mode = false; 310 311 /** 312 * Whether to use CSS classes in output 313 * @var boolean 314 */ 315 var $use_classes = false; 316 317 /** 318 * The type of header to use. Can be one of the following 319 * values: 320 * 321 * - GESHI_HEADER_PRE: Source is outputted in a "pre" HTML element. 322 * - GESHI_HEADER_DIV: Source is outputted in a "div" HTML element. 323 * - GESHI_HEADER_NONE: No header is outputted. 324 * 325 * @var int 326 */ 327 var $header_type = GESHI_HEADER_PRE; 328 329 /** 330 * Array of permissions for which lexics should be highlighted 331 * @var array 332 */ 333 var $lexic_permissions = array( 334 'KEYWORDS' => array(), 335 'COMMENTS' => array('MULTI' => true), 336 'REGEXPS' => array(), 337 'ESCAPE_CHAR' => true, 338 'BRACKETS' => true, 339 'SYMBOLS' => false, 340 'STRINGS' => true, 341 'NUMBERS' => true, 342 'METHODS' => true, 343 'SCRIPT' => true 344 ); 345 346 /** 347 * The time it took to parse the code 348 * @var double 349 */ 350 var $time = 0; 351 352 /** 353 * The content of the header block 354 * @var string 355 */ 356 var $header_content = ''; 357 358 /** 359 * The content of the footer block 360 * @var string 361 */ 362 var $footer_content = ''; 363 364 /** 365 * The style of the header block 366 * @var string 367 */ 368 var $header_content_style = ''; 369 370 /** 371 * The style of the footer block 372 * @var string 373 */ 374 var $footer_content_style = ''; 375 376 /** 377 * Tells if a block around the highlighted source should be forced 378 * if not using line numbering 379 * @var boolean 380 */ 381 var $force_code_block = false; 382 383 /** 384 * The styles for hyperlinks in the code 385 * @var array 386 */ 387 var $link_styles = array(); 388 389 /** 390 * Whether important blocks should be recognised or not 391 * @var boolean 392 * @deprecated 393 * @todo REMOVE THIS FUNCTIONALITY! 394 */ 395 var $enable_important_blocks = false; 396 397 /** 398 * Styles for important parts of the code 399 * @var string 400 * @deprecated 401 * @todo As above - rethink the whole idea of important blocks as it is buggy and 402 * will be hard to implement in 1.2 403 */ 404 var $important_styles = 'font-weight: bold; color: red;'; // Styles for important parts of the code 405 406 /** 407 * Whether CSS IDs should be added to the code 408 * @var boolean 409 */ 410 var $add_ids = false; 411 412 /** 413 * Lines that should be highlighted extra 414 * @var array 415 */ 416 var $highlight_extra_lines = array(); 417 418 /** 419 * Styles of lines that should be highlighted extra 420 * @var array 421 */ 422 var $highlight_extra_lines_styles = array(); 423 424 /** 425 * Styles of extra-highlighted lines 426 * @var string 427 */ 428 var $highlight_extra_lines_style = 'background-color: #ffc;'; 429 430 /** 431 * The line ending 432 * If null, nl2br() will be used on the result string. 433 * Otherwise, all instances of \n will be replaced with $line_ending 434 * @var string 435 */ 436 var $line_ending = null; 437 438 /** 439 * Number at which line numbers should start at 440 * @var int 441 */ 442 var $line_numbers_start = 1; 443 444 /** 445 * The overall style for this code block 446 * @var string 447 */ 448 var $overall_style = 'font-family:monospace;'; 449 450 /** 451 * The style for the actual code 452 * @var string 453 */ 454 var $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;'; 455 456 /** 457 * The overall class for this code block 458 * @var string 459 */ 460 var $overall_class = ''; 461 462 /** 463 * The overall ID for this code block 464 * @var string 465 */ 466 var $overall_id = ''; 467 468 /** 469 * Line number styles 470 * @var string 471 */ 472 var $line_style1 = 'font-weight: normal; vertical-align:top;'; 473 474 /** 475 * Line number styles for fancy lines 476 * @var string 477 */ 478 var $line_style2 = 'font-weight: bold; vertical-align:top;'; 479 480 /** 481 * Style for line numbers when GESHI_HEADER_PRE_TABLE is chosen 482 * @var string 483 */ 484 var $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;'; 485 486 /** 487 * Flag for how line numbers are displayed 488 * @var boolean 489 */ 490 var $line_numbers = GESHI_NO_LINE_NUMBERS; 491 492 /** 493 * Flag to decide if multi line spans are allowed. Set it to false to make sure 494 * each tag is closed before and reopened after each linefeed. 495 * @var boolean 496 */ 497 var $allow_multiline_span = true; 498 499 /** 500 * The "nth" value for fancy line highlighting 501 * @var int 502 */ 503 var $line_nth_row = 0; 504 505 /** 506 * The size of tab stops 507 * @var int 508 */ 509 var $tab_width = 8; 510 511 /** 512 * Should we use language-defined tab stop widths? 513 * @var int 514 */ 515 var $use_language_tab_width = false; 516 517 /** 518 * Default target for keyword links 519 * @var string 520 */ 521 var $link_target = ''; 522 523 /** 524 * The encoding to use for entity encoding 525 * NOTE: Used with Escape Char Sequences to fix UTF-8 handling (cf. SF#2037598) 526 * @var string 527 */ 528 var $encoding = 'utf-8'; 529 530 /** 531 * Should keywords be linked? 532 * @var boolean 533 */ 534 var $keyword_links = true; 535 536 /** 537 * Currently loaded language file 538 * @var string 539 * @since 1.0.7.22 540 */ 541 var $loaded_language = ''; 542 543 /** 544 * Wether the caches needed for parsing are built or not 545 * 546 * @var bool 547 * @since 1.0.8 548 */ 549 var $parse_cache_built = false; 550 551 /** 552 * Work around for Suhosin Patch with disabled /e modifier 553 * 554 * Note from suhosins author in config file: 555 * <blockquote> 556 * The /e modifier inside <code>preg_replace()</code> allows code execution. 557 * Often it is the cause for remote code execution exploits. It is wise to 558 * deactivate this feature and test where in the application it is used. 559 * The developer using the /e modifier should be made aware that he should 560 * use <code>preg_replace_callback()</code> instead 561 * </blockquote> 562 * 563 * @var array 564 * @since 1.0.8 565 */ 566 var $_kw_replace_group = 0; 567 var $_rx_key = 0; 568 569 /** 570 * some "callback parameters" for handle_multiline_regexps 571 * 572 * @since 1.0.8 573 * @access private 574 * @var string 575 */ 576 var $_hmr_before = ''; 577 var $_hmr_replace = ''; 578 var $_hmr_after = ''; 579 var $_hmr_key = 0; 580 581 /**#@-*/ 582 583 /** 584 * Creates a new GeSHi object, with source and language 585 * 586 * @param string The source code to highlight 587 * @param string The language to highlight the source with 588 * @param string The path to the language file directory. <b>This 589 * is deprecated!</b> I've backported the auto path 590 * detection from the 1.1.X dev branch, so now it 591 * should be automatically set correctly. If you have 592 * renamed the language directory however, you will 593 * still need to set the path using this parameter or 594 * {@link GeSHi->set_language_path()} 595 * @since 1.0.0 596 */ 597 function __construct($source = '', $language = '', $path = '') { 598 if (!empty($source)) { 599 $this->set_source($source); 600 } 601 if (!empty($language)) { 602 $this->set_language($language); 603 } 604 $this->set_language_path($path); 605 } 606 607 /** 608 * Returns the version of GeSHi 609 * 610 * @return string 611 * @since 1 0.8.11 612 */ 613 function get_version() 614 { 615 return GESHI_VERSION; 616 } 617 618 /** 619 * Returns an error message associated with the last GeSHi operation, 620 * or false if no error has occured 621 * 622 * @return string|false An error message if there has been an error, else false 623 * @since 1.0.0 624 */ 625 function error() { 626 if ($this->error) { 627 //Put some template variables for debugging here ... 628 $debug_tpl_vars = array( 629 '{LANGUAGE}' => $this->language, 630 '{PATH}' => $this->language_path 631 ); 632 $msg = str_replace( 633 array_keys($debug_tpl_vars), 634 array_values($debug_tpl_vars), 635 $this->error_messages[$this->error]); 636 637 return "<br /><strong>GeSHi Error:</strong> $msg (code {$this->error})<br />"; 638 } 639 return false; 640 } 641 642 /** 643 * Gets a human-readable language name (thanks to Simon Patterson 644 * for the idea :)) 645 * 646 * @return string The name for the current language 647 * @since 1.0.2 648 */ 649 function get_language_name() { 650 if (GESHI_ERROR_NO_SUCH_LANG == $this->error) { 651 return $this->language_data['LANG_NAME'] . ' (Unknown Language)'; 652 } 653 return $this->language_data['LANG_NAME']; 654 } 655 656 /** 657 * Sets the source code for this object 658 * 659 * @param string The source code to highlight 660 * @since 1.0.0 661 */ 662 function set_source($source) { 663 $this->source = $source; 664 $this->highlight_extra_lines = array(); 665 } 666 667 /** 668 * Sets the language for this object 669 * 670 * @note since 1.0.8 this function won't reset language-settings by default anymore! 671 * if you need this set $force_reset = true 672 * 673 * @param string The name of the language to use 674 * @since 1.0.0 675 */ 676 function set_language($language, $force_reset = false) { 677 if ($force_reset) { 678 $this->loaded_language = false; 679 } 680 681 //Clean up the language name to prevent malicious code injection 682 $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language); 683 684 $language = strtolower($language); 685 686 //Retreive the full filename 687 $file_name = $this->language_path . $language . '.php'; 688 if ($file_name == $this->loaded_language) { 689 // this language is already loaded! 690 return; 691 } 692 693 $this->language = $language; 694 695 $this->error = false; 696 $this->strict_mode = GESHI_NEVER; 697 698 //Check if we can read the desired file 699 if (!is_readable($file_name)) { 700 $this->error = GESHI_ERROR_NO_SUCH_LANG; 701 return; 702 } 703 704 // Load the language for parsing 705 $this->load_language($file_name); 706 } 707 708 /** 709 * Sets the path to the directory containing the language files. Note 710 * that this path is relative to the directory of the script that included 711 * geshi.php, NOT geshi.php itself. 712 * 713 * @param string The path to the language directory 714 * @since 1.0.0 715 * @deprecated The path to the language files should now be automatically 716 * detected, so this method should no longer be needed. The 717 * 1.1.X branch handles manual setting of the path differently 718 * so this method will disappear in 1.2.0. 719 */ 720 function set_language_path($path) { 721 if(strpos($path,':')) { 722 //Security Fix to prevent external directories using fopen wrappers. 723 if(DIRECTORY_SEPARATOR == "\\") { 724 if(!preg_match('#^[a-zA-Z]:#', $path) || false !== strpos($path, ':', 2)) { 725 return; 726 } 727 } else { 728 return; 729 } 730 } 731 if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) { 732 //Security Fix to prevent external directories using fopen wrappers. 733 return; 734 } 735 if(GESHI_SECURITY_PARANOID && false !== strpos($path, '/.')) { 736 //Security Fix to prevent external directories using fopen wrappers. 737 return; 738 } 739 if(GESHI_SECURITY_PARANOID && false !== strpos($path, '..')) { 740 //Security Fix to prevent external directories using fopen wrappers. 741 return; 742 } 743 if ($path) { 744 $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/'; 745 $this->set_language($this->language); // otherwise set_language_path has no effect 746 } 747 } 748 749 /** 750 * Get supported langs or an associative array lang=>full_name. 751 * @param boolean $longnames 752 * @return array 753 */ 754 function get_supported_languages($full_names=false) 755 { 756 // return array 757 $back = array(); 758 759 // we walk the lang root 760 $dir = dir($this->language_path); 761 762 // foreach entry 763 while (false !== ($entry = $dir->read())) 764 { 765 $full_path = $this->language_path.$entry; 766 767 // Skip all dirs 768 if (is_dir($full_path)) { 769 continue; 770 } 771 772 // we only want lang.php files 773 if (!preg_match('/^([^.]+)\.php$/', $entry, $matches)) { 774 continue; 775 } 776 777 // Raw lang name is here 778 $langname = $matches[1]; 779 780 // We want the fullname too? 781 if ($full_names === true) 782 { 783 if (false !== ($fullname = $this->get_language_fullname($langname))) 784 { 785 $back[$langname] = $fullname; // we go associative 786 } 787 } 788 else 789 { 790 // just store raw langname 791 $back[] = $langname; 792 } 793 } 794 795 $dir->close(); 796 797 return $back; 798 } 799 800 /** 801 * Get full_name for a lang or false. 802 * @param string $language short langname (html4strict for example) 803 * @return mixed 804 */ 805 function get_language_fullname($language) 806 { 807 //Clean up the language name to prevent malicious code injection 808 $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language); 809 810 $language = strtolower($language); 811 812 // get fullpath-filename for a langname 813 $fullpath = $this->language_path.$language.'.php'; 814 815 // we need to get contents :S 816 if (false === ($data = file_get_contents($fullpath))) { 817 $this->error = sprintf('Geshi::get_lang_fullname() Unknown Language: %s', $language); 818 return false; 819 } 820 821 // match the langname 822 if (!preg_match('/\'LANG_NAME\'\s*=>\s*\'((?:[^\']|\\\')+?)\'/', $data, $matches)) { 823 $this->error = sprintf('Geshi::get_lang_fullname(%s): Regex can not detect language', $language); 824 return false; 825 } 826 827 // return fullname for langname 828 return stripcslashes($matches[1]); 829 } 830 831 /** 832 * Sets the type of header to be used. 833 * 834 * If GESHI_HEADER_DIV is used, the code is surrounded in a "div".This 835 * means more source code but more control over tab width and line-wrapping. 836 * GESHI_HEADER_PRE means that a "pre" is used - less source, but less 837 * control. Default is GESHI_HEADER_PRE. 838 * 839 * From 1.0.7.2, you can use GESHI_HEADER_NONE to specify that no header code 840 * should be outputted. 841 * 842 * @param int The type of header to be used 843 * @since 1.0.0 844 */ 845 function set_header_type($type) { 846 //Check if we got a valid header type 847 if (!in_array($type, array(GESHI_HEADER_NONE, GESHI_HEADER_DIV, 848 GESHI_HEADER_PRE, GESHI_HEADER_PRE_VALID, GESHI_HEADER_PRE_TABLE))) { 849 $this->error = GESHI_ERROR_INVALID_HEADER_TYPE; 850 return; 851 } 852 853 //Set that new header type 854 $this->header_type = $type; 855 } 856 857 /** 858 * Sets the styles for the code that will be outputted 859 * when this object is parsed. The style should be a 860 * string of valid stylesheet declarations 861 * 862 * @param string The overall style for the outputted code block 863 * @param boolean Whether to merge the styles with the current styles or not 864 * @since 1.0.0 865 */ 866 function set_overall_style($style, $preserve_defaults = false) { 867 if (!$preserve_defaults) { 868 $this->overall_style = $style; 869 } else { 870 $this->overall_style .= $style; 871 } 872 } 873 874 /** 875 * Sets the overall classname for this block of code. This 876 * class can then be used in a stylesheet to style this object's 877 * output 878 * 879 * @param string The class name to use for this block of code 880 * @since 1.0.0 881 */ 882 function set_overall_class($class) { 883 $this->overall_class = $class; 884 } 885 886 /** 887 * Sets the overall id for this block of code. This id can then 888 * be used in a stylesheet to style this object's output 889 * 890 * @param string The ID to use for this block of code 891 * @since 1.0.0 892 */ 893 function set_overall_id($id) { 894 $this->overall_id = $id; 895 } 896 897 /** 898 * Sets whether CSS classes should be used to highlight the source. Default 899 * is off, calling this method with no arguments will turn it on 900 * 901 * @param boolean Whether to turn classes on or not 902 * @since 1.0.0 903 */ 904 function enable_classes($flag = true) { 905 $this->use_classes = ($flag) ? true : false; 906 } 907 908 /** 909 * Sets the style for the actual code. This should be a string 910 * containing valid stylesheet declarations. If $preserve_defaults is 911 * true, then styles are merged with the default styles, with the 912 * user defined styles having priority 913 * 914 * Note: Use this method to override any style changes you made to 915 * the line numbers if you are using line numbers, else the line of 916 * code will have the same style as the line number! Consult the 917 * GeSHi documentation for more information about this. 918 * 919 * @param string The style to use for actual code 920 * @param boolean Whether to merge the current styles with the new styles 921 * @since 1.0.2 922 */ 923 function set_code_style($style, $preserve_defaults = false) { 924 if (!$preserve_defaults) { 925 $this->code_style = $style; 926 } else { 927 $this->code_style .= $style; 928 } 929 } 930 931 /** 932 * Sets the styles for the line numbers. 933 * 934 * @param string The style for the line numbers that are "normal" 935 * @param string|boolean If a string, this is the style of the line 936 * numbers that are "fancy", otherwise if boolean then this 937 * defines whether the normal styles should be merged with the 938 * new normal styles or not 939 * @param boolean If set, is the flag for whether to merge the "fancy" 940 * styles with the current styles or not 941 * @since 1.0.2 942 */ 943 function set_line_style($style1, $style2 = '', $preserve_defaults = false) { 944 //Check if we got 2 or three parameters 945 if (is_bool($style2)) { 946 $preserve_defaults = $style2; 947 $style2 = ''; 948 } 949 950 //Actually set the new styles 951 if (!$preserve_defaults) { 952 $this->line_style1 = $style1; 953 $this->line_style2 = $style2; 954 } else { 955 $this->line_style1 .= $style1; 956 $this->line_style2 .= $style2; 957 } 958 } 959 960 /** 961 * Sets whether line numbers should be displayed. 962 * 963 * Valid values for the first parameter are: 964 * 965 * - GESHI_NO_LINE_NUMBERS: Line numbers will not be displayed 966 * - GESHI_NORMAL_LINE_NUMBERS: Line numbers will be displayed 967 * - GESHI_FANCY_LINE_NUMBERS: Fancy line numbers will be displayed 968 * 969 * For fancy line numbers, the second parameter is used to signal which lines 970 * are to be fancy. For example, if the value of this parameter is 5 then every 971 * 5th line will be fancy. 972 * 973 * @param int How line numbers should be displayed 974 * @param int Defines which lines are fancy 975 * @since 1.0.0 976 */ 977 function enable_line_numbers($flag, $nth_row = 5) { 978 if (GESHI_NO_LINE_NUMBERS != $flag && GESHI_NORMAL_LINE_NUMBERS != $flag 979 && GESHI_FANCY_LINE_NUMBERS != $flag) { 980 $this->error = GESHI_ERROR_INVALID_LINE_NUMBER_TYPE; 981 } 982 $this->line_numbers = $flag; 983 $this->line_nth_row = $nth_row; 984 } 985 986 /** 987 * Sets wether spans and other HTML markup generated by GeSHi can 988 * span over multiple lines or not. Defaults to true to reduce overhead. 989 * Set it to false if you want to manipulate the output or manually display 990 * the code in an ordered list. 991 * 992 * @param boolean Wether multiline spans are allowed or not 993 * @since 1.0.7.22 994 */ 995 function enable_multiline_span($flag) { 996 $this->allow_multiline_span = (bool) $flag; 997 } 998 999 /** 1000 * Get current setting for multiline spans, see GeSHi->enable_multiline_span(). 1001 * 1002 * @see enable_multiline_span 1003 * @return bool 1004 */ 1005 function get_multiline_span() { 1006 return $this->allow_multiline_span; 1007 } 1008 1009 /** 1010 * Sets the style for a keyword group. If $preserve_defaults is 1011 * true, then styles are merged with the default styles, with the 1012 * user defined styles having priority 1013 * 1014 * @param int The key of the keyword group to change the styles of 1015 * @param string The style to make the keywords 1016 * @param boolean Whether to merge the new styles with the old or just 1017 * to overwrite them 1018 * @since 1.0.0 1019 */ 1020 function set_keyword_group_style($key, $style, $preserve_defaults = false) { 1021 //Set the style for this keyword group 1022 if (!$preserve_defaults) { 1023 $this->language_data['STYLES']['KEYWORDS'][$key] = $style; 1024 } else { 1025 $this->language_data['STYLES']['KEYWORDS'][$key] .= $style; 1026 } 1027 1028 //Update the lexic permissions 1029 if (!isset($this->lexic_permissions['KEYWORDS'][$key])) { 1030 $this->lexic_permissions['KEYWORDS'][$key] = true; 1031 } 1032 } 1033 1034 /** 1035 * Turns highlighting on/off for a keyword group 1036 * 1037 * @param int The key of the keyword group to turn on or off 1038 * @param boolean Whether to turn highlighting for that group on or off 1039 * @since 1.0.0 1040 */ 1041 function set_keyword_group_highlighting($key, $flag = true) { 1042 $this->lexic_permissions['KEYWORDS'][$key] = ($flag) ? true : false; 1043 } 1044 1045 /** 1046 * Sets the styles for comment groups. If $preserve_defaults is 1047 * true, then styles are merged with the default styles, with the 1048 * user defined styles having priority 1049 * 1050 * @param int The key of the comment group to change the styles of 1051 * @param string The style to make the comments 1052 * @param boolean Whether to merge the new styles with the old or just 1053 * to overwrite them 1054 * @since 1.0.0 1055 */ 1056 function set_comments_style($key, $style, $preserve_defaults = false) { 1057 if (!$preserve_defaults) { 1058 $this->language_data['STYLES']['COMMENTS'][$key] = $style; 1059 } else { 1060 $this->language_data['STYLES']['COMMENTS'][$key] .= $style; 1061 } 1062 } 1063 1064 /** 1065 * Turns highlighting on/off for comment groups 1066 * 1067 * @param int The key of the comment group to turn on or off 1068 * @param boolean Whether to turn highlighting for that group on or off 1069 * @since 1.0.0 1070 */ 1071 function set_comments_highlighting($key, $flag = true) { 1072 $this->lexic_permissions['COMMENTS'][$key] = ($flag) ? true : false; 1073 } 1074 1075 /** 1076 * Sets the styles for escaped characters. If $preserve_defaults is 1077 * true, then styles are merged with the default styles, with the 1078 * user defined styles having priority 1079 * 1080 * @param string The style to make the escape characters 1081 * @param boolean Whether to merge the new styles with the old or just 1082 * to overwrite them 1083 * @since 1.0.0 1084 */ 1085 function set_escape_characters_style($style, $preserve_defaults = false, $group = 0) { 1086 if (!$preserve_defaults) { 1087 $this->language_data['STYLES']['ESCAPE_CHAR'][$group] = $style; 1088 } else { 1089 $this->language_data['STYLES']['ESCAPE_CHAR'][$group] .= $style; 1090 } 1091 } 1092 1093 /** 1094 * Turns highlighting on/off for escaped characters 1095 * 1096 * @param boolean Whether to turn highlighting for escape characters on or off 1097 * @since 1.0.0 1098 */ 1099 function set_escape_characters_highlighting($flag = true) { 1100 $this->lexic_permissions['ESCAPE_CHAR'] = ($flag) ? true : false; 1101 } 1102 1103 /** 1104 * Sets the styles for brackets. If $preserve_defaults is 1105 * true, then styles are merged with the default styles, with the 1106 * user defined styles having priority 1107 * 1108 * This method is DEPRECATED: use set_symbols_style instead. 1109 * This method will be removed in 1.2.X 1110 * 1111 * @param string The style to make the brackets 1112 * @param boolean Whether to merge the new styles with the old or just 1113 * to overwrite them 1114 * @since 1.0.0 1115 * @deprecated In favour of set_symbols_style 1116 */ 1117 function set_brackets_style($style, $preserve_defaults = false) { 1118 if (!$preserve_defaults) { 1119 $this->language_data['STYLES']['BRACKETS'][0] = $style; 1120 } else { 1121 $this->language_data['STYLES']['BRACKETS'][0] .= $style; 1122 } 1123 } 1124 1125 /** 1126 * Turns highlighting on/off for brackets 1127 * 1128 * This method is DEPRECATED: use set_symbols_highlighting instead. 1129 * This method will be remove in 1.2.X 1130 * 1131 * @param boolean Whether to turn highlighting for brackets on or off 1132 * @since 1.0.0 1133 * @deprecated In favour of set_symbols_highlighting 1134 */ 1135 function set_brackets_highlighting($flag) { 1136 $this->lexic_permissions['BRACKETS'] = ($flag) ? true : false; 1137 } 1138 1139 /** 1140 * Sets the styles for symbols. If $preserve_defaults is 1141 * true, then styles are merged with the default styles, with the 1142 * user defined styles having priority 1143 * 1144 * @param string The style to make the symbols 1145 * @param boolean Whether to merge the new styles with the old or just 1146 * to overwrite them 1147 * @param int Tells the group of symbols for which style should be set. 1148 * @since 1.0.1 1149 */ 1150 function set_symbols_style($style, $preserve_defaults = false, $group = 0) { 1151 // Update the style of symbols 1152 if (!$preserve_defaults) { 1153 $this->language_data['STYLES']['SYMBOLS'][$group] = $style; 1154 } else { 1155 $this->language_data['STYLES']['SYMBOLS'][$group] .= $style; 1156 } 1157 1158 // For backward compatibility 1159 if (0 == $group) { 1160 $this->set_brackets_style ($style, $preserve_defaults); 1161 } 1162 } 1163 1164 /** 1165 * Turns highlighting on/off for symbols 1166 * 1167 * @param boolean Whether to turn highlighting for symbols on or off 1168 * @since 1.0.0 1169 */ 1170 function set_symbols_highlighting($flag) { 1171 // Update lexic permissions for this symbol group 1172 $this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false; 1173 1174 // For backward compatibility 1175 $this->set_brackets_highlighting ($flag); 1176 } 1177 1178 /** 1179 * Sets the styles for strings. If $preserve_defaults is 1180 * true, then styles are merged with the default styles, with the 1181 * user defined styles having priority 1182 * 1183 * @param string The style to make the escape characters 1184 * @param boolean Whether to merge the new styles with the old or just 1185 * to overwrite them 1186 * @param int Tells the group of strings for which style should be set. 1187 * @since 1.0.0 1188 */ 1189 function set_strings_style($style, $preserve_defaults = false, $group = 0) { 1190 if (!$preserve_defaults) { 1191 $this->language_data['STYLES']['STRINGS'][$group] = $style; 1192 } else { 1193 $this->language_data['STYLES']['STRINGS'][$group] .= $style; 1194 } 1195 } 1196 1197 /** 1198 * Turns highlighting on/off for strings 1199 * 1200 * @param boolean Whether to turn highlighting for strings on or off 1201 * @since 1.0.0 1202 */ 1203 function set_strings_highlighting($flag) { 1204 $this->lexic_permissions['STRINGS'] = ($flag) ? true : false; 1205 } 1206 1207 /** 1208 * Sets the styles for strict code blocks. If $preserve_defaults is 1209 * true, then styles are merged with the default styles, with the 1210 * user defined styles having priority 1211 * 1212 * @param string The style to make the script blocks 1213 * @param boolean Whether to merge the new styles with the old or just 1214 * to overwrite them 1215 * @param int Tells the group of script blocks for which style should be set. 1216 * @since 1.0.8.4 1217 */ 1218 function set_script_style($style, $preserve_defaults = false, $group = 0) { 1219 // Update the style of symbols 1220 if (!$preserve_defaults) { 1221 $this->language_data['STYLES']['SCRIPT'][$group] = $style; 1222 } else { 1223 $this->language_data['STYLES']['SCRIPT'][$group] .= $style; 1224 } 1225 } 1226 1227 /** 1228 * Sets the styles for numbers. If $preserve_defaults is 1229 * true, then styles are merged with the default styles, with the 1230 * user defined styles having priority 1231 * 1232 * @param string The style to make the numbers 1233 * @param boolean Whether to merge the new styles with the old or just 1234 * to overwrite them 1235 * @param int Tells the group of numbers for which style should be set. 1236 * @since 1.0.0 1237 */ 1238 function set_numbers_style($style, $preserve_defaults = false, $group = 0) { 1239 if (!$preserve_defaults) { 1240 $this->language_data['STYLES']['NUMBERS'][$group] = $style; 1241 } else { 1242 $this->language_data['STYLES']['NUMBERS'][$group] .= $style; 1243 } 1244 } 1245 1246 /** 1247 * Turns highlighting on/off for numbers 1248 * 1249 * @param boolean Whether to turn highlighting for numbers on or off 1250 * @since 1.0.0 1251 */ 1252 function set_numbers_highlighting($flag) { 1253 $this->lexic_permissions['NUMBERS'] = ($flag) ? true : false; 1254 } 1255 1256 /** 1257 * Sets the styles for methods. $key is a number that references the 1258 * appropriate "object splitter" - see the language file for the language 1259 * you are highlighting to get this number. If $preserve_defaults is 1260 * true, then styles are merged with the default styles, with the 1261 * user defined styles having priority 1262 * 1263 * @param int The key of the object splitter to change the styles of 1264 * @param string The style to make the methods 1265 * @param boolean Whether to merge the new styles with the old or just 1266 * to overwrite them 1267 * @since 1.0.0 1268 */ 1269 function set_methods_style($key, $style, $preserve_defaults = false) { 1270 if (!$preserve_defaults) { 1271 $this->language_data['STYLES']['METHODS'][$key] = $style; 1272 } else { 1273 $this->language_data['STYLES']['METHODS'][$key] .= $style; 1274 } 1275 } 1276 1277 /** 1278 * Turns highlighting on/off for methods 1279 * 1280 * @param boolean Whether to turn highlighting for methods on or off 1281 * @since 1.0.0 1282 */ 1283 function set_methods_highlighting($flag) { 1284 $this->lexic_permissions['METHODS'] = ($flag) ? true : false; 1285 } 1286 1287 /** 1288 * Sets the styles for regexps. If $preserve_defaults is 1289 * true, then styles are merged with the default styles, with the 1290 * user defined styles having priority 1291 * 1292 * @param string The style to make the regular expression matches 1293 * @param boolean Whether to merge the new styles with the old or just 1294 * to overwrite them 1295 * @since 1.0.0 1296 */ 1297 function set_regexps_style($key, $style, $preserve_defaults = false) { 1298 if (!$preserve_defaults) { 1299 $this->language_data['STYLES']['REGEXPS'][$key] = $style; 1300 } else { 1301 $this->language_data['STYLES']['REGEXPS'][$key] .= $style; 1302 } 1303 } 1304 1305 /** 1306 * Turns highlighting on/off for regexps 1307 * 1308 * @param int The key of the regular expression group to turn on or off 1309 * @param boolean Whether to turn highlighting for the regular expression group on or off 1310 * @since 1.0.0 1311 */ 1312 function set_regexps_highlighting($key, $flag) { 1313 $this->lexic_permissions['REGEXPS'][$key] = ($flag) ? true : false; 1314 } 1315 1316 /** 1317 * Sets whether a set of keywords are checked for in a case sensitive manner 1318 * 1319 * @param int The key of the keyword group to change the case sensitivity of 1320 * @param boolean Whether to check in a case sensitive manner or not 1321 * @since 1.0.0 1322 */ 1323 function set_case_sensitivity($key, $case) { 1324 $this->language_data['CASE_SENSITIVE'][$key] = ($case) ? true : false; 1325 } 1326 1327 /** 1328 * Sets the case that keywords should use when found. Use the constants: 1329 * 1330 * - GESHI_CAPS_NO_CHANGE: leave keywords as-is 1331 * - GESHI_CAPS_UPPER: convert all keywords to uppercase where found 1332 * - GESHI_CAPS_LOWER: convert all keywords to lowercase where found 1333 * 1334 * @param int A constant specifying what to do with matched keywords 1335 * @since 1.0.1 1336 */ 1337 function set_case_keywords($case) { 1338 if (in_array($case, array( 1339 GESHI_CAPS_NO_CHANGE, GESHI_CAPS_UPPER, GESHI_CAPS_LOWER))) { 1340 $this->language_data['CASE_KEYWORDS'] = $case; 1341 } 1342 } 1343 1344 /** 1345 * Sets how many spaces a tab is substituted for 1346 * 1347 * Widths below zero are ignored 1348 * 1349 * @param int The tab width 1350 * @since 1.0.0 1351 */ 1352 function set_tab_width($width) { 1353 $this->tab_width = intval($width); 1354 1355 //Check if it fit's the constraints: 1356 if ($this->tab_width < 1) { 1357 //Return it to the default 1358 $this->tab_width = 8; 1359 } 1360 } 1361 1362 /** 1363 * Sets whether or not to use tab-stop width specifed by language 1364 * 1365 * @param boolean Whether to use language-specific tab-stop widths 1366 * @since 1.0.7.20 1367 */ 1368 function set_use_language_tab_width($use) { 1369 $this->use_language_tab_width = (bool) $use; 1370 } 1371 1372 /** 1373 * Returns the tab width to use, based on the current language and user 1374 * preference 1375 * 1376 * @return int Tab width 1377 * @since 1.0.7.20 1378 */ 1379 function get_real_tab_width() { 1380 if (!$this->use_language_tab_width || 1381 !isset($this->language_data['TAB_WIDTH'])) { 1382 return $this->tab_width; 1383 } else { 1384 return $this->language_data['TAB_WIDTH']; 1385 } 1386 } 1387 1388 /** 1389 * Enables/disables strict highlighting. Default is off, calling this 1390 * method without parameters will turn it on. See documentation 1391 * for more details on strict mode and where to use it. 1392 * 1393 * @param boolean Whether to enable strict mode or not 1394 * @since 1.0.0 1395 */ 1396 function enable_strict_mode($mode = true) { 1397 if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) { 1398 $this->strict_mode = ($mode) ? GESHI_ALWAYS : GESHI_NEVER; 1399 } 1400 } 1401 1402 /** 1403 * Disables all highlighting 1404 * 1405 * @since 1.0.0 1406 * @todo Rewrite with array traversal 1407 * @deprecated In favour of enable_highlighting 1408 */ 1409 function disable_highlighting() { 1410 $this->enable_highlighting(false); 1411 } 1412 1413 /** 1414 * Enables all highlighting 1415 * 1416 * The optional flag parameter was added in version 1.0.7.21 and can be used 1417 * to enable (true) or disable (false) all highlighting. 1418 * 1419 * @since 1.0.0 1420 * @param boolean A flag specifying whether to enable or disable all highlighting 1421 * @todo Rewrite with array traversal 1422 */ 1423 function enable_highlighting($flag = true) { 1424 $flag = $flag ? true : false; 1425 foreach ($this->lexic_permissions as $key => $value) { 1426 if (is_array($value)) { 1427 foreach ($value as $k => $v) { 1428 $this->lexic_permissions[$key][$k] = $flag; 1429 } 1430 } else { 1431 $this->lexic_permissions[$key] = $flag; 1432 } 1433 } 1434 1435 // Context blocks 1436 $this->enable_important_blocks = $flag; 1437 } 1438 1439 /** 1440 * Given a file extension, this method returns either a valid geshi language 1441 * name, or the empty string if it couldn't be found 1442 * 1443 * @param string The extension to get a language name for 1444 * @param array A lookup array to use instead of the default one 1445 * @since 1.0.5 1446 * @todo Re-think about how this method works (maybe make it private and/or make it 1447 * a extension->lang lookup?) 1448 * @todo static? 1449 */ 1450 function get_language_name_from_extension( $extension, $lookup = array() ) { 1451 $extension = strtolower($extension); 1452 1453 if ( !is_array($lookup) || empty($lookup)) { 1454 $lookup = array( 1455 '6502acme' => array( 'a', 's', 'asm', 'inc' ), 1456 '6502tasm' => array( 'a', 's', 'asm', 'inc' ), 1457 '6502kickass' => array( 'a', 's', 'asm', 'inc' ), 1458 '68000devpac' => array( 'a', 's', 'asm', 'inc' ), 1459 'abap' => array('abap'), 1460 'actionscript' => array('as'), 1461 'ada' => array('a', 'ada', 'adb', 'ads'), 1462 'apache' => array('conf'), 1463 'asm' => array('ash', 'asm', 'inc'), 1464 'asp' => array('asp'), 1465 'bash' => array('sh'), 1466 'bf' => array('bf'), 1467 'c' => array('c', 'h'), 1468 'c_mac' => array('c', 'h'), 1469 'caddcl' => array(), 1470 'cadlisp' => array(), 1471 'cdfg' => array('cdfg'), 1472 'cobol' => array('cbl'), 1473 'cpp' => array('cpp', 'hpp', 'C', 'H', 'CPP', 'HPP'), 1474 'csharp' => array('cs'), 1475 'css' => array('css'), 1476 'd' => array('d'), 1477 'delphi' => array('dpk', 'dpr', 'pp', 'pas'), 1478 'diff' => array('diff', 'patch'), 1479 'dos' => array('bat', 'cmd'), 1480 'gdb' => array('kcrash', 'crash', 'bt'), 1481 'gettext' => array('po', 'pot'), 1482 'gml' => array('gml'), 1483 'gnuplot' => array('plt'), 1484 'groovy' => array('groovy'), 1485 'haskell' => array('hs'), 1486 'haxe' => array('hx'), 1487 'html4strict' => array('html', 'htm'), 1488 'ini' => array('ini', 'desktop'), 1489 'java' => array('java'), 1490 'javascript' => array('js'), 1491 'klonec' => array('kl1'), 1492 'klonecpp' => array('klx'), 1493 'latex' => array('tex'), 1494 'lisp' => array('lisp'), 1495 'lua' => array('lua'), 1496 'matlab' => array('m'), 1497 'mpasm' => array(), 1498 'mysql' => array('sql'), 1499 'nsis' => array(), 1500 'objc' => array(), 1501 'oobas' => array(), 1502 'oracle8' => array(), 1503 'oracle10' => array(), 1504 'pascal' => array('pas'), 1505 'perl' => array('pl', 'pm'), 1506 'php' => array('php', 'php5', 'phtml', 'phps'), 1507 'povray' => array('pov'), 1508 'providex' => array('pvc', 'pvx'), 1509 'prolog' => array('pl'), 1510 'python' => array('py'), 1511 'qbasic' => array('bi'), 1512 'reg' => array('reg'), 1513 'ruby' => array('rb'), 1514 'sas' => array('sas'), 1515 'scala' => array('scala'), 1516 'scheme' => array('scm'), 1517 'scilab' => array('sci'), 1518 'smalltalk' => array('st'), 1519 'smarty' => array(), 1520 'tcl' => array('tcl'), 1521 'text' => array('txt'), 1522 'vb' => array('bas'), 1523 'vbnet' => array(), 1524 'visualfoxpro' => array(), 1525 'whitespace' => array('ws'), 1526 'xml' => array('xml', 'svg', 'xrc'), 1527 'z80' => array('z80', 'asm', 'inc') 1528 ); 1529 } 1530 1531 foreach ($lookup as $lang => $extensions) { 1532 if (in_array($extension, $extensions)) { 1533 return $lang; 1534 } 1535 } 1536 1537 return 'text'; 1538 } 1539 1540 /** 1541 * Given a file name, this method loads its contents in, and attempts 1542 * to set the language automatically. An optional lookup table can be 1543 * passed for looking up the language name. If not specified a default 1544 * table is used 1545 * 1546 * The language table is in the form 1547 * <pre>array( 1548 * 'lang_name' => array('extension', 'extension', ...), 1549 * 'lang_name' ... 1550 * );</pre> 1551 * 1552 * @param string The filename to load the source from 1553 * @param array A lookup array to use instead of the default one 1554 * @todo Complete rethink of this and above method 1555 * @since 1.0.5 1556 */ 1557 function load_from_file($file_name, $lookup = array()) { 1558 if (is_readable($file_name)) { 1559 $this->set_source(file_get_contents($file_name)); 1560 $this->set_language($this->get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup)); 1561 } else { 1562 $this->error = GESHI_ERROR_FILE_NOT_READABLE; 1563 } 1564 } 1565 1566 /** 1567 * Adds a keyword to a keyword group for highlighting 1568 * 1569 * @param int The key of the keyword group to add the keyword to 1570 * @param string The word to add to the keyword group 1571 * @since 1.0.0 1572 */ 1573 function add_keyword($key, $word) { 1574 if (!is_array($this->language_data['KEYWORDS'][$key])) { 1575 $this->language_data['KEYWORDS'][$key] = array(); 1576 } 1577 if (!in_array($word, $this->language_data['KEYWORDS'][$key])) { 1578 $this->language_data['KEYWORDS'][$key][] = $word; 1579 1580 //NEW in 1.0.8 don't recompile the whole optimized regexp, simply append it 1581 if ($this->parse_cache_built) { 1582 $subkey = count($this->language_data['CACHED_KEYWORD_LISTS'][$key]) - 1; 1583 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$subkey] .= '|' . preg_quote($word, '/'); 1584 } 1585 } 1586 } 1587 1588 /** 1589 * Removes a keyword from a keyword group 1590 * 1591 * @param int The key of the keyword group to remove the keyword from 1592 * @param string The word to remove from the keyword group 1593 * @param bool Wether to automatically recompile the optimized regexp list or not. 1594 * Note: if you set this to false and @see GeSHi->parse_code() was already called once, 1595 * for the current language, you have to manually call @see GeSHi->optimize_keyword_group() 1596 * or the removed keyword will stay in cache and still be highlighted! On the other hand 1597 * it might be too expensive to recompile the regexp list for every removal if you want to 1598 * remove a lot of keywords. 1599 * @since 1.0.0 1600 */ 1601 function remove_keyword($key, $word, $recompile = true) { 1602 $key_to_remove = array_search($word, $this->language_data['KEYWORDS'][$key]); 1603 if ($key_to_remove !== false) { 1604 unset($this->language_data['KEYWORDS'][$key][$key_to_remove]); 1605 1606 //NEW in 1.0.8, optionally recompile keyword group 1607 if ($recompile && $this->parse_cache_built) { 1608 $this->optimize_keyword_group($key); 1609 } 1610 } 1611 } 1612 1613 /** 1614 * Creates a new keyword group 1615 * 1616 * @param int The key of the keyword group to create 1617 * @param string The styles for the keyword group 1618 * @param boolean Whether the keyword group is case sensitive ornot 1619 * @param array The words to use for the keyword group 1620 * @since 1.0.0 1621 */ 1622 function add_keyword_group($key, $styles, $case_sensitive = true, $words = array()) { 1623 $words = (array) $words; 1624 if (empty($words)) { 1625 // empty word lists mess up highlighting 1626 return false; 1627 } 1628 1629 //Add the new keyword group internally 1630 $this->language_data['KEYWORDS'][$key] = $words; 1631 $this->lexic_permissions['KEYWORDS'][$key] = true; 1632 $this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive; 1633 $this->language_data['STYLES']['KEYWORDS'][$key] = $styles; 1634 1635 //NEW in 1.0.8, cache keyword regexp 1636 if ($this->parse_cache_built) { 1637 $this->optimize_keyword_group($key); 1638 } 1639 } 1640 1641 /** 1642 * Removes a keyword group 1643 * 1644 * @param int The key of the keyword group to remove 1645 * @since 1.0.0 1646 */ 1647 function remove_keyword_group ($key) { 1648 //Remove the keyword group internally 1649 unset($this->language_data['KEYWORDS'][$key]); 1650 unset($this->lexic_permissions['KEYWORDS'][$key]); 1651 unset($this->language_data['CASE_SENSITIVE'][$key]); 1652 unset($this->language_data['STYLES']['KEYWORDS'][$key]); 1653 1654 //NEW in 1.0.8 1655 unset($this->language_data['CACHED_KEYWORD_LISTS'][$key]); 1656 } 1657 1658 /** 1659 * compile optimized regexp list for keyword group 1660 * 1661 * @param int The key of the keyword group to compile & optimize 1662 * @since 1.0.8 1663 */ 1664 function optimize_keyword_group($key) { 1665 $this->language_data['CACHED_KEYWORD_LISTS'][$key] = 1666 $this->optimize_regexp_list($this->language_data['KEYWORDS'][$key]); 1667 $space_as_whitespace = false; 1668 if(isset($this->language_data['PARSER_CONTROL'])) { 1669 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) { 1670 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'])) { 1671 $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE']; 1672 } 1673 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) { 1674 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) { 1675 $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE']; 1676 } 1677 } 1678 } 1679 } 1680 if($space_as_whitespace) { 1681 foreach($this->language_data['CACHED_KEYWORD_LISTS'][$key] as $rxk => $rxv) { 1682 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$rxk] = 1683 str_replace(" ", "\\s+", $rxv); 1684 } 1685 } 1686 } 1687 1688 /** 1689 * Sets the content of the header block 1690 * 1691 * @param string The content of the header block 1692 * @since 1.0.2 1693 */ 1694 function set_header_content($content) { 1695 $this->header_content = $content; 1696 } 1697 1698 /** 1699 * Sets the content of the footer block 1700 * 1701 * @param string The content of the footer block 1702 * @since 1.0.2 1703 */ 1704 function set_footer_content($content) { 1705 $this->footer_content = $content; 1706 } 1707 1708 /** 1709 * Sets the style for the header content 1710 * 1711 * @param string The style for the header content 1712 * @since 1.0.2 1713 */ 1714 function set_header_content_style($style) { 1715 $this->header_content_style = $style; 1716 } 1717 1718 /** 1719 * Sets the style for the footer content 1720 * 1721 * @param string The style for the footer content 1722 * @since 1.0.2 1723 */ 1724 function set_footer_content_style($style) { 1725 $this->footer_content_style = $style; 1726 } 1727 1728 /** 1729 * Sets whether to force a surrounding block around 1730 * the highlighted code or not 1731 * 1732 * @param boolean Tells whether to enable or disable this feature 1733 * @since 1.0.7.20 1734 */ 1735 function enable_inner_code_block($flag) { 1736 $this->force_code_block = (bool)$flag; 1737 } 1738 1739 /** 1740 * Sets the base URL to be used for keywords 1741 * 1742 * @param int The key of the keyword group to set the URL for 1743 * @param string The URL to set for the group. If {FNAME} is in 1744 * the url somewhere, it is replaced by the keyword 1745 * that the URL is being made for 1746 * @since 1.0.2 1747 */ 1748 function set_url_for_keyword_group($group, $url) { 1749 $this->language_data['URLS'][$group] = $url; 1750 } 1751 1752 /** 1753 * Sets styles for links in code 1754 * 1755 * @param int A constant that specifies what state the style is being 1756 * set for - e.g. :hover or :visited 1757 * @param string The styles to use for that state 1758 * @since 1.0.2 1759 */ 1760 function set_link_styles($type, $styles) { 1761 $this->link_styles[$type] = $styles; 1762 } 1763 1764 /** 1765 * Sets the target for links in code 1766 * 1767 * @param string The target for links in the code, e.g. _blank 1768 * @since 1.0.3 1769 */ 1770 function set_link_target($target) { 1771 if (!$target) { 1772 $this->link_target = ''; 1773 } else { 1774 $this->link_target = ' target="' . $target . '"'; 1775 } 1776 } 1777 1778 /** 1779 * Sets styles for important parts of the code 1780 * 1781 * @param string The styles to use on important parts of the code 1782 * @since 1.0.2 1783 */ 1784 function set_important_styles($styles) { 1785 $this->important_styles = $styles; 1786 } 1787 1788 /** 1789 * Sets whether context-important blocks are highlighted 1790 * 1791 * @param boolean Tells whether to enable or disable highlighting of important blocks 1792 * @todo REMOVE THIS SHIZ FROM GESHI! 1793 * @deprecated 1794 * @since 1.0.2 1795 */ 1796 function enable_important_blocks($flag) { 1797 $this->enable_important_blocks = ( $flag ) ? true : false; 1798 } 1799 1800 /** 1801 * Whether CSS IDs should be added to each line 1802 * 1803 * @param boolean If true, IDs will be added to each line. 1804 * @since 1.0.2 1805 */ 1806 function enable_ids($flag = true) { 1807 $this->add_ids = ($flag) ? true : false; 1808 } 1809 1810 /** 1811 * Specifies which lines to highlight extra 1812 * 1813 * The extra style parameter was added in 1.0.7.21. 1814 * 1815 * @param mixed An array of line numbers to highlight, or just a line 1816 * number on its own. 1817 * @param string A string specifying the style to use for this line. 1818 * If null is specified, the default style is used. 1819 * If false is specified, the line will be removed from 1820 * special highlighting 1821 * @since 1.0.2 1822 * @todo Some data replication here that could be cut down on 1823 */ 1824 function highlight_lines_extra($lines, $style = null) { 1825 if (is_array($lines)) { 1826 //Split up the job using single lines at a time 1827 foreach ($lines as $line) { 1828 $this->highlight_lines_extra($line, $style); 1829 } 1830 } else { 1831 //Mark the line as being highlighted specially 1832 $lines = intval($lines); 1833 $this->highlight_extra_lines[$lines] = $lines; 1834 1835 //Decide on which style to use 1836 if ($style === null) { //Check if we should use default style 1837 unset($this->highlight_extra_lines_styles[$lines]); 1838 } elseif ($style === false) { //Check if to remove this line 1839 unset($this->highlight_extra_lines[$lines]); 1840 unset($this->highlight_extra_lines_styles[$lines]); 1841 } else { 1842 $this->highlight_extra_lines_styles[$lines] = $style; 1843 } 1844 } 1845 } 1846 1847 /** 1848 * Sets the style for extra-highlighted lines 1849 * 1850 * @param string The style for extra-highlighted lines 1851 * @since 1.0.2 1852 */ 1853 function set_highlight_lines_extra_style($styles) { 1854 $this->highlight_extra_lines_style = $styles; 1855 } 1856 1857 /** 1858 * Sets the line-ending 1859 * 1860 * @param string The new line-ending 1861 * @since 1.0.2 1862 */ 1863 function set_line_ending($line_ending) { 1864 $this->line_ending = (string)$line_ending; 1865 } 1866 1867 /** 1868 * Sets what number line numbers should start at. Should 1869 * be a positive integer, and will be converted to one. 1870 * 1871 * <b>Warning:</b> Using this method will add the "start" 1872 * attribute to the &lt;ol&gt; that is used for line numbering. 1873 * This is <b>not</b> valid XHTML strict, so if that's what you 1874 * care about then don't use this method. Firefox is getting 1875 * support for the CSS method of doing this in 1.1 and Opera 1876 * has support for the CSS method, but (of course) IE doesn't 1877 * so it's not worth doing it the CSS way yet. 1878 * 1879 * @param int The number to start line numbers at 1880 * @since 1.0.2 1881 */ 1882 function start_line_numbers_at($number) { 1883 $this->line_numbers_start = abs(intval($number)); 1884 } 1885 1886 /** 1887 * Sets the encoding used for htmlspecialchars(), for international 1888 * support. 1889 * 1890 * NOTE: This is not needed for now because htmlspecialchars() is not 1891 * being used (it has a security hole in PHP4 that has not been patched). 1892 * Maybe in a future version it may make a return for speed reasons, but 1893 * I doubt it. 1894 * 1895 * @param string The encoding to use for the source 1896 * @since 1.0.3 1897 */ 1898 function set_encoding($encoding) { 1899 if ($encoding) { 1900 $this->encoding = strtolower($encoding); 1901 } 1902 } 1903 1904 /** 1905 * Turns linking of keywords on or off. 1906 * 1907 * @param boolean If true, links will be added to keywords 1908 * @since 1.0.2 1909 */ 1910 function enable_keyword_links($enable = true) { 1911 $this->keyword_links = (bool) $enable; 1912 } 1913 1914 /** 1915 * Setup caches needed for styling. This is automatically called in 1916 * parse_code() and get_stylesheet() when appropriate. This function helps 1917 * stylesheet generators as they rely on some style information being 1918 * preprocessed 1919 * 1920 * @since 1.0.8 1921 * @access private 1922 */ 1923 function build_style_cache() { 1924 //Build the style cache needed to highlight numbers appropriate 1925 if($this->lexic_permissions['NUMBERS']) { 1926 //First check what way highlighting information for numbers are given 1927 if(!isset($this->language_data['NUMBERS'])) { 1928 $this->language_data['NUMBERS'] = 0; 1929 } 1930 1931 if(is_array($this->language_data['NUMBERS'])) { 1932 $this->language_data['NUMBERS_CACHE'] = $this->language_data['NUMBERS']; 1933 } else { 1934 $this->language_data['NUMBERS_CACHE'] = array(); 1935 if(!$this->language_data['NUMBERS']) { 1936 $this->language_data['NUMBERS'] = 1937 GESHI_NUMBER_INT_BASIC | 1938 GESHI_NUMBER_FLT_NONSCI; 1939 } 1940 1941 for($i = 0, $j = $this->language_data['NUMBERS']; $j > 0; ++$i, $j>>=1) { 1942 //Rearrange style indices if required ... 1943 if(isset($this->language_data['STYLES']['NUMBERS'][1<<$i])) { 1944 $this->language_data['STYLES']['NUMBERS'][$i] = 1945 $this->language_data['STYLES']['NUMBERS'][1<<$i]; 1946 unset($this->language_data['STYLES']['NUMBERS'][1<<$i]); 1947 } 1948 1949 //Check if this bit is set for highlighting 1950 if($j&1) { 1951 //So this bit is set ... 1952 //Check if it belongs to group 0 or the actual stylegroup 1953 if(isset($this->language_data['STYLES']['NUMBERS'][$i])) { 1954 $this->language_data['NUMBERS_CACHE'][$i] = 1 << $i; 1955 } else { 1956 if(!isset($this->language_data['NUMBERS_CACHE'][0])) { 1957 $this->language_data['NUMBERS_CACHE'][0] = 0; 1958 } 1959 $this->language_data['NUMBERS_CACHE'][0] |= 1 << $i; 1960 } 1961 } 1962 } 1963 } 1964 } 1965 } 1966 1967 /** 1968 * Setup caches needed for parsing. This is automatically called in parse_code() when appropriate. 1969 * This function makes stylesheet generators much faster as they do not need these caches. 1970 * 1971 * @since 1.0.8 1972 * @access private 1973 */ 1974 function build_parse_cache() { 1975 // cache symbol regexp 1976 //As this is a costy operation, we avoid doing it for multiple groups ... 1977 //Instead we perform it for all symbols at once. 1978 // 1979 //For this to work, we need to reorganize the data arrays. 1980 if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) { 1981 $this->language_data['MULTIPLE_SYMBOL_GROUPS'] = count($this->language_data['STYLES']['SYMBOLS']) > 1; 1982 1983 $this->language_data['SYMBOL_DATA'] = array(); 1984 $symbol_preg_multi = array(); // multi char symbols 1985 $symbol_preg_single = array(); // single char symbols 1986 foreach ($this->language_data['SYMBOLS'] as $key => $symbols) { 1987 if (is_array($symbols)) { 1988 foreach ($symbols as $sym) { 1989 $sym = $this->hsc($sym); 1990 if (!isset($this->language_data['SYMBOL_DATA'][$sym])) { 1991 $this->language_data['SYMBOL_DATA'][$sym] = $key; 1992 if (isset($sym[1])) { // multiple chars 1993 $symbol_preg_multi[] = preg_quote($sym, '/'); 1994 } else { // single char 1995 if ($sym == '-') { 1996 // don't trigger range out of order error 1997 $symbol_preg_single[] = '\-'; 1998 } else { 1999 $symbol_preg_single[] = preg_quote($sym, '/'); 2000 } 2001 } 2002 } 2003 } 2004 } else { 2005 $symbols = $this->hsc($symbols); 2006 if (!isset($this->language_data['SYMBOL_DATA'][$symbols])) { 2007 $this->language_data['SYMBOL_DATA'][$symbols] = 0; 2008 if (isset($symbols[1])) { // multiple chars 2009 $symbol_preg_multi[] = preg_quote($symbols, '/'); 2010 } elseif ($symbols == '-') { 2011 // don't trigger range out of order error 2012 $symbol_preg_single[] = '\-'; 2013 } else { // single char 2014 $symbol_preg_single[] = preg_quote($symbols, '/'); 2015 } 2016 } 2017 } 2018 } 2019 2020 //Now we have an array with each possible symbol as the key and the style as the actual data. 2021 //This way we can set the correct style just the moment we highlight ... 2022 // 2023 //Now we need to rewrite our array to get a search string that 2024 $symbol_preg = array(); 2025 if (!empty($symbol_preg_multi)) { 2026 rsort($symbol_preg_multi); 2027 $symbol_preg[] = implode('|', $symbol_preg_multi); 2028 } 2029 if (!empty($symbol_preg_single)) { 2030 rsort($symbol_preg_single); 2031 $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']'; 2032 } 2033 $this->language_data['SYMBOL_SEARCH'] = implode("|", $symbol_preg); 2034 } 2035 2036 // cache optimized regexp for keyword matching 2037 // remove old cache 2038 $this->language_data['CACHED_KEYWORD_LISTS'] = array(); 2039 foreach (array_keys($this->language_data['KEYWORDS']) as $key) { 2040 if (!isset($this->lexic_permissions['KEYWORDS'][$key]) || 2041 $this->lexic_permissions['KEYWORDS'][$key]) { 2042 $this->optimize_keyword_group($key); 2043 } 2044 } 2045 2046 // brackets 2047 if ($this->lexic_permissions['BRACKETS']) { 2048 $this->language_data['CACHE_BRACKET_MATCH'] = array('[', ']', '(', ')', '{', '}'); 2049 if (!$this->use_classes && isset($this->language_data['STYLES']['BRACKETS'][0])) { 2050 $this->language_data['CACHE_BRACKET_REPLACE'] = array( 2051 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#91;|>', 2052 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#93;|>', 2053 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#40;|>', 2054 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#41;|>', 2055 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#123;|>', 2056 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#125;|>', 2057 ); 2058 } 2059 else { 2060 $this->language_data['CACHE_BRACKET_REPLACE'] = array( 2061 '<| class="br0">&#91;|>', 2062 '<| class="br0">&#93;|>', 2063 '<| class="br0">&#40;|>', 2064 '<| class="br0">&#41;|>', 2065 '<| class="br0">&#123;|>', 2066 '<| class="br0">&#125;|>', 2067 ); 2068 } 2069 } 2070 2071 //Build the parse cache needed to highlight numbers appropriate 2072 if($this->lexic_permissions['NUMBERS']) { 2073 //Check if the style rearrangements have been processed ... 2074 //This also does some preprocessing to check which style groups are useable ... 2075 if(!isset($this->language_data['NUMBERS_CACHE'])) { 2076 $this->build_style_cache(); 2077 } 2078 2079 //Number format specification 2080 //All this formats are matched case-insensitively! 2081 static $numbers_format = array( 2082 GESHI_NUMBER_INT_BASIC => 2083 '(?:(?<![0-9a-z_\.%$@])|(?<=\.\.))(?<![\d\.]e[+\-])([1-9]\d*?|0)(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2084 GESHI_NUMBER_INT_CSTYLE => 2085 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)l(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2086 GESHI_NUMBER_BIN_SUFFIX => 2087 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[01]+?[bB](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2088 GESHI_NUMBER_BIN_PREFIX_PERCENT => 2089 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])%[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2090 GESHI_NUMBER_BIN_PREFIX_0B => 2091 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0b[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2092 GESHI_NUMBER_OCT_PREFIX => 2093 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2094 GESHI_NUMBER_OCT_PREFIX_0O => 2095 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0o[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2096 GESHI_NUMBER_OCT_PREFIX_AT => 2097 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])\@[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2098 GESHI_NUMBER_OCT_SUFFIX => 2099 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[0-7]+?o(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2100 GESHI_NUMBER_HEX_PREFIX => 2101 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0x[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2102 GESHI_NUMBER_HEX_PREFIX_DOLLAR => 2103 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\$[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2104 GESHI_NUMBER_HEX_SUFFIX => 2105 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d[0-9a-fA-F]*?[hH](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2106 GESHI_NUMBER_FLT_NONSCI => 2107 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d+?\.\d+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2108 GESHI_NUMBER_FLT_NONSCI_F => 2109 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)f(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2110 GESHI_NUMBER_FLT_SCI_SHORT => 2111 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\.\d+?(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2112 GESHI_NUMBER_FLT_SCI_ZERO => 2113 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)' 2114 ); 2115 2116 //At this step we have an associative array with flag groups for a 2117 //specific style or an string denoting a regexp given its index. 2118 $this->language_data['NUMBERS_RXCACHE'] = array(); 2119 foreach($this->language_data['NUMBERS_CACHE'] as $key => $rxdata) { 2120 if(is_string($rxdata)) { 2121 $regexp = $rxdata; 2122 } else { 2123 //This is a bitfield of number flags to highlight: 2124 //Build an array, implode them together and make this the actual RX 2125 $rxuse = array(); 2126 for($i = 1; $i <= $rxdata; $i<<=1) { 2127 if($rxdata & $i) { 2128 $rxuse[] = $numbers_format[$i]; 2129 } 2130 } 2131 $regexp = implode("|", $rxuse); 2132 } 2133 2134 $this->language_data['NUMBERS_RXCACHE'][$key] = 2135 "/(?<!<\|\/)(?<!<\|!REG3XP)(?<!<\|\/NUM!)(?<!\d\/>)($regexp)(?!(?:<DOT>|(?>[^\<]))+>)(?![^<]*>)(?!\|>)(?!\/>)/i"; // 2136 } 2137 2138 if(!isset($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'])) { 2139 $this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'] = '#\d#'; 2140 } 2141 } 2142 2143 $this->parse_cache_built = true; 2144 } 2145 2146 /** 2147 * Returns the code in $this->source, highlighted and surrounded by the 2148 * nessecary HTML. 2149 * 2150 * This should only be called ONCE, cos it's SLOW! If you want to highlight 2151 * the same source multiple times, you're better off doing a whole lot of 2152 * str_replaces to replace the &lt;span&gt;s 2153 * 2154 * @since 1.0.0 2155 */ 2156 function parse_code () { 2157 // Start the timer 2158 $start_time = microtime(); 2159 2160 // Replace all newlines to a common form. 2161 $code = str_replace("\r\n", "\n", $this->source); 2162 $code = str_replace("\r", "\n", $code); 2163 2164 // Firstly, if there is an error, we won't highlight 2165 if ($this->error) { 2166 //Escape the source for output 2167 $result = $this->hsc($this->source); 2168 2169 //This fix is related to SF#1923020, but has to be applied regardless of 2170 //actually highlighting symbols. 2171 $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $result); 2172 2173 // Timing is irrelevant 2174 $this->set_time($start_time, $start_time); 2175 $this->finalise($result); 2176 return $result; 2177 } 2178 2179 // make sure the parse cache is up2date 2180 if (!$this->parse_cache_built) { 2181 $this->build_parse_cache(); 2182 } 2183 2184 // Initialise various stuff 2185 $length = strlen($code); 2186 $COMMENT_MATCHED = false; 2187 $stuff_to_parse = ''; 2188 $endresult = ''; 2189 2190 // "Important" selections are handled like multiline comments 2191 // @todo GET RID OF THIS SHIZ 2192 if ($this->enable_important_blocks) { 2193 $this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI_END_IMPORTANT; 2194 } 2195 2196 if ($this->strict_mode) { 2197 // Break the source into bits. Each bit will be a portion of the code 2198 // within script delimiters - for example, HTML between < and > 2199 $k = 0; 2200 $parts = array(); 2201 $matches = array(); 2202 $next_match_pointer = null; 2203 // we use a copy to unset delimiters on demand (when they are not found) 2204 $delim_copy = $this->language_data['SCRIPT_DELIMITERS']; 2205 $i = 0; 2206 while ($i < $length) { 2207 $next_match_pos = $length + 1; // never true 2208 foreach ($delim_copy as $dk => $delimiters) { 2209 if(is_array($delimiters)) { 2210 foreach ($delimiters as $open => $close) { 2211 // make sure the cache is setup properly 2212 if (!isset($matches[$dk][$open])) { 2213 $matches[$dk][$open] = array( 2214 'next_match' => -1, 2215 'dk' => $dk, 2216 2217 'open' => $open, // needed for grouping of adjacent code blocks (see below) 2218 'open_strlen' => strlen($open), 2219 2220 'close' => $close, 2221 'close_strlen' => strlen($close), 2222 ); 2223 } 2224 // Get the next little bit for this opening string 2225 if ($matches[$dk][$open]['next_match'] < $i) { 2226 // only find the next pos if it was not already cached 2227 $open_pos = strpos($code, $open, $i); 2228 if ($open_pos === false) { 2229 // no match for this delimiter ever 2230 unset($delim_copy[$dk][$open]); 2231 continue; 2232 } 2233 $matches[$dk][$open]['next_match'] = $open_pos; 2234 } 2235 if ($matches[$dk][$open]['next_match'] < $next_match_pos) { 2236 //So we got a new match, update the close_pos 2237 $matches[$dk][$open]['close_pos'] = 2238 strpos($code, $close, $matches[$dk][$open]['next_match']+1); 2239 2240 $next_match_pointer =& $matches[$dk][$open]; 2241 $next_match_pos = $matches[$dk][$open]['next_match']; 2242 } 2243 } 2244 } else { 2245 //So we should match an RegExp as Strict Block ... 2246 /** 2247 * The value in $delimiters is expected to be an RegExp 2248 * containing exactly 2 matching groups: 2249 * - Group 1 is the opener 2250 * - Group 2 is the closer 2251 */ 2252 if(!GESHI_PHP_PRE_433 && //Needs proper rewrite to work with PHP >=4.3.0; 4.3.3 is guaranteed to work. 2253 preg_match($delimiters, $code, $matches_rx, PREG_OFFSET_CAPTURE, $i)) { 2254 //We got a match ... 2255 if(isset($matches_rx['start']) && isset($matches_rx['end'])) 2256 { 2257 $matches[$dk] = array( 2258 'next_match' => $matches_rx['start'][1], 2259 'dk' => $dk, 2260 2261 'close_strlen' => strlen($matches_rx['end'][0]), 2262 'close_pos' => $matches_rx['end'][1], 2263 ); 2264 } else { 2265 $matches[$dk] = array( 2266 'next_match' => $matches_rx[1][1], 2267 'dk' => $dk, 2268 2269 'close_strlen' => strlen($matches_rx[2][0]), 2270 'close_pos' => $matches_rx[2][1], 2271 ); 2272 } 2273 } else { 2274 // no match for this delimiter ever 2275 unset($delim_copy[$dk]); 2276 continue; 2277 } 2278 2279 if ($matches[$dk]['next_match'] <= $next_match_pos) { 2280 $next_match_pointer =& $matches[$dk]; 2281 $next_match_pos = $matches[$dk]['next_match']; 2282 } 2283 } 2284 } 2285 2286 // non-highlightable text 2287 $parts[$k] = array( 2288 1 => substr($code, $i, $next_match_pos - $i) 2289 ); 2290 ++$k; 2291 2292 if ($next_match_pos > $length) { 2293 // out of bounds means no next match was found 2294 break; 2295 } 2296 2297 // highlightable code 2298 $parts[$k][0] = $next_match_pointer['dk']; 2299 2300 //Only combine for non-rx script blocks 2301 if(is_array($delim_copy[$next_match_pointer['dk']])) { 2302 // group adjacent script blocks, e.g. <foobar><asdf> should be one block, not three! 2303 $i = $next_match_pos + $next_match_pointer['open_strlen']; 2304 while (true) { 2305 $close_pos = strpos($code, $next_match_pointer['close'], $i); 2306 if ($close_pos == false) { 2307 break; 2308 } 2309 $i = $close_pos + $next_match_pointer['close_strlen']; 2310 if ($i == $length) { 2311 break; 2312 } 2313 if ($code[$i] == $next_match_pointer['open'][0] && ($next_match_pointer['open_strlen'] == 1 || 2314 substr($code, $i, $next_match_pointer['open_strlen']) == $next_match_pointer['open'])) { 2315 // merge adjacent but make sure we don't merge things like <tag><!-- comment --> 2316 foreach ($matches as $submatches) { 2317 foreach ($submatches as $match) { 2318 if ($match['next_match'] == $i) { 2319 // a different block already matches here! 2320 break 3; 2321 } 2322 } 2323 } 2324 } else { 2325 break; 2326 } 2327 } 2328 } else { 2329 $close_pos = $next_match_pointer['close_pos'] + $next_match_pointer['close_strlen']; 2330 $i = $close_pos; 2331 } 2332 2333 if ($close_pos === false) { 2334 // no closing delimiter found! 2335 $parts[$k][1] = substr($code, $next_match_pos); 2336 ++$k; 2337 break; 2338 } else { 2339 $parts[$k][1] = substr($code, $next_match_pos, $i - $next_match_pos); 2340 ++$k; 2341 } 2342 } 2343 unset($delim_copy, $next_match_pointer, $next_match_pos, $matches); 2344 $num_parts = $k; 2345 2346 if ($num_parts == 1 && $this->strict_mode == GESHI_MAYBE) { 2347 // when we have only one part, we don't have anything to highlight at all. 2348 // if we have a "maybe" strict language, this should be handled as highlightable code 2349 $parts = array( 2350 0 => array( 2351 0 => '', 2352 1 => '' 2353 ), 2354 1 => array( 2355 0 => null, 2356 1 => $parts[0][1] 2357 ) 2358 ); 2359 $num_parts = 2; 2360 } 2361 2362 } else { 2363 // Not strict mode - simply dump the source into 2364 // the array at index 1 (the first highlightable block) 2365 $parts = array( 2366 0 => array( 2367 0 => '', 2368 1 => '' 2369 ), 2370 1 => array( 2371 0 => null, 2372 1 => $code 2373 ) 2374 ); 2375 $num_parts = 2; 2376 } 2377 2378 //Unset variables we won't need any longer 2379 unset($code); 2380 2381 //Preload some repeatedly used values regarding hardquotes ... 2382 $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false; 2383 $hq_strlen = strlen($hq); 2384 2385 //Preload if line numbers are to be generated afterwards 2386 //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398 2387 $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS || 2388 !empty($this->highlight_extra_lines) || !$this->allow_multiline_span; 2389 2390 //preload the escape char for faster checking ... 2391 $escaped_escape_char = $this->hsc($this->language_data['ESCAPE_CHAR']); 2392 2393 // this is used for single-line comments 2394 $sc_disallowed_before = ""; 2395 $sc_disallowed_after = ""; 2396 2397 if (isset($this->language_data['PARSER_CONTROL'])) { 2398 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) { 2399 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) { 2400 $sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE']; 2401 } 2402 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) { 2403 $sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER']; 2404 } 2405 } 2406 } 2407 2408 //Fix for SF#1932083: Multichar Quotemarks unsupported 2409 $is_string_starter = array(); 2410 if ($this->lexic_permissions['STRINGS']) { 2411 foreach ($this->language_data['QUOTEMARKS'] as $quotemark) { 2412 if (!isset($is_string_starter[$quotemark[0]])) { 2413 $is_string_starter[$quotemark[0]] = (string)$quotemark; 2414 } elseif (is_string($is_string_starter[$quotemark[0]])) { 2415 $is_string_starter[$quotemark[0]] = array( 2416 $is_string_starter[$quotemark[0]], 2417 $quotemark); 2418 } else { 2419 $is_string_starter[$quotemark[0]][] = $quotemark; 2420 } 2421 } 2422 } 2423 2424 // Now we go through each part. We know that even-indexed parts are 2425 // code that shouldn't be highlighted, and odd-indexed parts should 2426 // be highlighted 2427 for ($key = 0; $key < $num_parts; ++$key) { 2428 $STRICTATTRS = ''; 2429 2430 // If this block should be highlighted... 2431 if (!($key & 1)) { 2432 // Else not a block to highlight 2433 $endresult .= $this->hsc($parts[$key][1]); 2434 unset($parts[$key]); 2435 continue; 2436 } 2437 2438 $result = ''; 2439 $part = $parts[$key][1]; 2440 2441 $highlight_part = true; 2442 if ($this->strict_mode && !is_null($parts[$key][0])) { 2443 // get the class key for this block of code 2444 $script_key = $parts[$key][0]; 2445 $highlight_part = $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key]; 2446 if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' && 2447 $this->lexic_permissions['SCRIPT']) { 2448 // Add a span element around the source to 2449 // highlight the overall source block 2450 if (!$this->use_classes && 2451 $this->language_data['STYLES']['SCRIPT'][$script_key] != '') { 2452 $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"'; 2453 } else { 2454 $attributes = ' class="sc' . $script_key . '"'; 2455 } 2456 $result .= "<span$attributes>"; 2457 $STRICTATTRS = $attributes; 2458 } 2459 } 2460 2461 if ($highlight_part) { 2462 // Now, highlight the code in this block. This code 2463 // is really the engine of GeSHi (along with the method 2464 // parse_non_string_part). 2465 2466 // cache comment regexps incrementally 2467 $next_comment_regexp_key = ''; 2468 $next_comment_regexp_pos = -1; 2469 $next_comment_multi_pos = -1; 2470 $next_comment_single_pos = -1; 2471 $comment_regexp_cache_per_key = array(); 2472 $comment_multi_cache_per_key = array(); 2473 $comment_single_cache_per_key = array(); 2474 $next_open_comment_multi = ''; 2475 $next_comment_single_key = ''; 2476 $escape_regexp_cache_per_key = array(); 2477 $next_escape_regexp_key = ''; 2478 $next_escape_regexp_pos = -1; 2479 2480 $length = strlen($part); 2481 for ($i = 0; $i < $length; ++$i) { 2482 // Get the next char 2483 $char = $part[$i]; 2484 $char_len = 1; 2485 2486 // update regexp comment cache if needed 2487 if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) { 2488 $next_comment_regexp_pos = $length; 2489 foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) { 2490 $match_i = false; 2491 if (isset($comment_regexp_cache_per_key[$comment_key]) && 2492 ($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i || 2493 $comment_regexp_cache_per_key[$comment_key]['pos'] === false)) { 2494 // we have already matched something 2495 if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) { 2496 // this comment is never matched 2497 continue; 2498 } 2499 $match_i = $comment_regexp_cache_per_key[$comment_key]['pos']; 2500 } elseif ( 2501 //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible 2502 (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $i), $match, PREG_OFFSET_CAPTURE)) || 2503 (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i)) 2504 ) { 2505 $match_i = $match[0][1]; 2506 if (GESHI_PHP_PRE_433) { 2507 $match_i += $i; 2508 } 2509 2510 $comment_regexp_cache_per_key[$comment_key] = array( 2511 'key' => $comment_key, 2512 'length' => strlen($match[0][0]), 2513 'pos' => $match_i 2514 ); 2515 } else { 2516 $comment_regexp_cache_per_key[$comment_key]['pos'] = false; 2517 continue; 2518 } 2519 2520 if ($match_i !== false && $match_i < $next_comment_regexp_pos) { 2521 $next_comment_regexp_pos = $match_i; 2522 $next_comment_regexp_key = $comment_key; 2523 if ($match_i === $i) { 2524 break; 2525 } 2526 } 2527 } 2528 } 2529 2530 $string_started = false; 2531 2532 if (isset($is_string_starter[$char])) { 2533 // Possibly the start of a new string ... 2534 2535 //Check which starter it was ... 2536 //Fix for SF#1932083: Multichar Quotemarks unsupported 2537 if (is_array($is_string_starter[$char])) { 2538 $char_new = ''; 2539 foreach ($is_string_starter[$char] as $testchar) { 2540 if ($testchar === substr($part, $i, strlen($testchar)) && 2541 strlen($testchar) > strlen($char_new)) { 2542 $char_new = $testchar; 2543 $string_started = true; 2544 } 2545 } 2546 if ($string_started) { 2547 $char = $char_new; 2548 } 2549 } else { 2550 $testchar = $is_string_starter[$char]; 2551 if ($testchar === substr($part, $i, strlen($testchar))) { 2552 $char = $testchar; 2553 $string_started = true; 2554 } 2555 } 2556 $char_len = strlen($char); 2557 } 2558 2559 if ($string_started && ($i != $next_comment_regexp_pos)) { 2560 // Hand out the correct style information for this string 2561 $string_key = array_search($char, $this->language_data['QUOTEMARKS']); 2562 if (!isset($this->language_data['STYLES']['STRINGS'][$string_key]) || 2563 !isset($this->language_data['STYLES']['ESCAPE_CHAR'][$string_key])) { 2564 $string_key = 0; 2565 } 2566 2567 // parse the stuff before this 2568 $result .= $this->parse_non_string_part($stuff_to_parse); 2569 $stuff_to_parse = ''; 2570 2571 if (!$this->use_classes) { 2572 $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][$string_key] . '"'; 2573 } else { 2574 $string_attributes = ' class="st'.$string_key.'"'; 2575 } 2576 2577 // now handle the string 2578 $string = "<span$string_attributes>" . GeSHi::hsc($char); 2579 $start = $i + $char_len; 2580 $string_open = true; 2581 2582 if(empty($this->language_data['ESCAPE_REGEXP'])) { 2583 $next_escape_regexp_pos = $length; 2584 } 2585 2586 do { 2587 //Get the regular ending pos ... 2588 $close_pos = strpos($part, $char, $start); 2589 if(false === $close_pos) { 2590 $close_pos = $length; 2591 } 2592 2593 if($this->lexic_permissions['ESCAPE_CHAR']) { 2594 // update escape regexp cache if needed 2595 if (isset($this->language_data['ESCAPE_REGEXP']) && $next_escape_regexp_pos < $start) { 2596 $next_escape_regexp_pos = $length; 2597 foreach ($this->language_data['ESCAPE_REGEXP'] as $escape_key => $regexp) { 2598 $match_i = false; 2599 if (isset($escape_regexp_cache_per_key[$escape_key]) && 2600 ($escape_regexp_cache_per_key[$escape_key]['pos'] >= $start || 2601 $escape_regexp_cache_per_key[$escape_key]['pos'] === false)) { 2602 // we have already matched something 2603 if ($escape_regexp_cache_per_key[$escape_key]['pos'] === false) { 2604 // this comment is never matched 2605 continue; 2606 } 2607 $match_i = $escape_regexp_cache_per_key[$escape_key]['pos']; 2608 } elseif ( 2609 //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible 2610 (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $start), $match, PREG_OFFSET_CAPTURE)) || 2611 (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $start)) 2612 ) { 2613 $match_i = $match[0][1]; 2614 if (GESHI_PHP_PRE_433) { 2615 $match_i += $start; 2616 } 2617 2618 $escape_regexp_cache_per_key[$escape_key] = array( 2619 'key' => $escape_key, 2620 'length' => strlen($match[0][0]), 2621 'pos' => $match_i 2622 ); 2623 } else { 2624 $escape_regexp_cache_per_key[$escape_key]['pos'] = false; 2625 continue; 2626 } 2627 2628 if ($match_i !== false && $match_i < $next_escape_regexp_pos) { 2629 $next_escape_regexp_pos = $match_i; 2630 $next_escape_regexp_key = $escape_key; 2631 if ($match_i === $start) { 2632 break; 2633 } 2634 } 2635 } 2636 } 2637 2638 //Find the next simple escape position 2639 if('' != $this->language_data['ESCAPE_CHAR']) { 2640 $simple_escape = strpos($part, $this->language_data['ESCAPE_CHAR'], $start); 2641 if(false === $simple_escape) { 2642 $simple_escape = $length; 2643 } 2644 } else { 2645 $simple_escape = $length; 2646 } 2647 } else { 2648 $next_escape_regexp_pos = $length; 2649 $simple_escape = $length; 2650 } 2651 2652 if($simple_escape < $next_escape_regexp_pos && 2653 $simple_escape < $length && 2654 $simple_escape < $close_pos) { 2655 //The nexxt escape sequence is a simple one ... 2656 $es_pos = $simple_escape; 2657 2658 //Add the stuff not in the string yet ... 2659 $string .= $this->hsc(substr($part, $start, $es_pos - $start)); 2660 2661 //Get the style for this escaped char ... 2662 if (!$this->use_classes) { 2663 $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"'; 2664 } else { 2665 $escape_char_attributes = ' class="es0"'; 2666 } 2667 2668 //Add the style for the escape char ... 2669 $string .= "<span$escape_char_attributes>" . 2670 GeSHi::hsc($this->language_data['ESCAPE_CHAR']); 2671 2672 //Get the byte AFTER the ESCAPE_CHAR we just found 2673 $es_char = $part[$es_pos + 1]; 2674 if ($es_char == "\n") { 2675 // don't put a newline around newlines 2676 $string .= "</span>\n"; 2677 $start = $es_pos + 2; 2678 } elseif (ord($es_char) >= 128) { 2679 //This is an non-ASCII char (UTF8 or single byte) 2680 //This code tries to work around SF#2037598 ... 2681 if(function_exists('mb_substr')) { 2682 $es_char_m = mb_substr(substr($part, $es_pos+1, 16), 0, 1, $this->encoding); 2683 $string .= $es_char_m . '</span>'; 2684 } elseif (!GESHI_PHP_PRE_433 && 'utf-8' == $this->encoding) { 2685 if(preg_match("/[\xC2-\xDF][\x80-\xBF]". 2686 "|\xE0[\xA0-\xBF][\x80-\xBF]". 2687 "|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}". 2688 "|\xED[\x80-\x9F][\x80-\xBF]". 2689 "|\xF0[\x90-\xBF][\x80-\xBF]{2}". 2690 "|[\xF1-\xF3][\x80-\xBF]{3}". 2691 "|\xF4[\x80-\x8F][\x80-\xBF]{2}/s", 2692 $part, $es_char_m, null, $es_pos + 1)) { 2693 $es_char_m = $es_char_m[0]; 2694 } else { 2695 $es_char_m = $es_char; 2696 } 2697 $string .= $this->hsc($es_char_m) . '</span>'; 2698 } else { 2699 $es_char_m = $this->hsc($es_char); 2700 } 2701 $start = $es_pos + strlen($es_char_m) + 1; 2702 } else { 2703 $string .= $this->hsc($es_char) . '</span>'; 2704 $start = $es_pos + 2; 2705 } 2706 } elseif ($next_escape_regexp_pos < $length && 2707 $next_escape_regexp_pos < $close_pos) { 2708 $es_pos = $next_escape_regexp_pos; 2709 //Add the stuff not in the string yet ... 2710 $string .= $this->hsc(substr($part, $start, $es_pos - $start)); 2711 2712 //Get the key and length of this match ... 2713 $escape = $escape_regexp_cache_per_key[$next_escape_regexp_key]; 2714 $escape_str = substr($part, $es_pos, $escape['length']); 2715 $escape_key = $escape['key']; 2716 2717 //Get the style for this escaped char ... 2718 if (!$this->use_classes) { 2719 $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][$escape_key] . '"'; 2720 } else { 2721 $escape_char_attributes = ' class="es' . $escape_key . '"'; 2722 } 2723 2724 //Add the style for the escape char ... 2725 $string .= "<span$escape_char_attributes>" . 2726 $this->hsc($escape_str) . '</span>'; 2727 2728 $start = $es_pos + $escape['length']; 2729 } else { 2730 //Copy the remainder of the string ... 2731 $string .= $this->hsc(substr($part, $start, $close_pos - $start + $char_len)) . '</span>'; 2732 $start = $close_pos + $char_len; 2733 $string_open = false; 2734 } 2735 } while($string_open); 2736 2737 if ($check_linenumbers) { 2738 // Are line numbers used? If, we should end the string before 2739 // the newline and begin it again (so when <li>s are put in the source 2740 // remains XHTML compliant) 2741 // note to self: This opens up possibility of config files specifying 2742 // that languages can/cannot have multiline strings??? 2743 $string = str_replace("\n", "</span>\n<span$string_attributes>", $string); 2744 } 2745 2746 $result .= $string; 2747 $string = ''; 2748 $i = $start - 1; 2749 continue; 2750 } elseif ($this->lexic_permissions['STRINGS'] && $hq && $hq[0] == $char && 2751 substr($part, $i, $hq_strlen) == $hq && ($i != $next_comment_regexp_pos)) { 2752 // The start of a hard quoted string 2753 if (!$this->use_classes) { 2754 $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS']['HARD'] . '"'; 2755 $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR']['HARD'] . '"'; 2756 } else { 2757 $string_attributes = ' class="st_h"'; 2758 $escape_char_attributes = ' class="es_h"'; 2759 } 2760 // parse the stuff before this 2761 $result .= $this->parse_non_string_part($stuff_to_parse); 2762 $stuff_to_parse = ''; 2763 2764 // now handle the string 2765 $string = ''; 2766 2767 // look for closing quote 2768 $start = $i + $hq_strlen; 2769 while ($close_pos = strpos($part, $this->language_data['HARDQUOTE'][1], $start)) { 2770 $start = $close_pos + 1; 2771 if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['HARDCHAR'] && 2772 (($i + $hq_strlen) != ($close_pos))) { //Support empty string for HQ escapes if Starter = Escape 2773 // make sure this quote is not escaped 2774 foreach ($this->language_data['HARDESCAPE'] as $hardescape) { 2775 if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) { 2776 // check wether this quote is escaped or if it is something like '\\' 2777 $escape_char_pos = $close_pos - 1; 2778 while ($escape_char_pos > 0 2779 && $part[$escape_char_pos - 1] == $this->language_data['HARDCHAR']) { 2780 --$escape_char_pos; 2781 } 2782 if (($close_pos - $escape_char_pos) & 1) { 2783 // uneven number of escape chars => this quote is escaped 2784 continue 2; 2785 } 2786 } 2787 } 2788 } 2789 2790 // found closing quote 2791 break; 2792 } 2793 2794 //Found the closing delimiter? 2795 if (!$close_pos) { 2796 // span till the end of this $part when no closing delimiter is found 2797 $close_pos = $length; 2798 } 2799 2800 //Get the actual string 2801 $string = substr($part, $i, $close_pos - $i + 1); 2802 $i = $close_pos; 2803 2804 // handle escape chars and encode html chars 2805 // (special because when we have escape chars within our string they may not be escaped) 2806 if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) { 2807 $start = 0; 2808 $new_string = ''; 2809 while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) { 2810 // hmtl escape stuff before 2811 $new_string .= $this->hsc(substr($string, $start, $es_pos - $start)); 2812 // check if this is a hard escape 2813 foreach ($this->language_data['HARDESCAPE'] as $hardescape) { 2814 if (substr($string, $es_pos, strlen($hardescape)) == $hardescape) { 2815 // indeed, this is a hardescape 2816 $new_string .= "<span$escape_char_attributes>" . 2817 $this->hsc($hardescape) . '</span>'; 2818 $start = $es_pos + strlen($hardescape); 2819 continue 2; 2820 } 2821 } 2822 // not a hard escape, but a normal escape 2823 // they come in pairs of two 2824 $c = 0; 2825 while (isset($string[$es_pos + $c]) && isset($string[$es_pos + $c + 1]) 2826 && $string[$es_pos + $c] == $this->language_data['ESCAPE_CHAR'] 2827 && $string[$es_pos + $c + 1] == $this->language_data['ESCAPE_CHAR']) { 2828 $c += 2; 2829 } 2830 if ($c) { 2831 $new_string .= "<span$escape_char_attributes>" . 2832 str_repeat($escaped_escape_char, $c) . 2833 '</span>'; 2834 $start = $es_pos + $c; 2835 } else { 2836 // this is just a single lonely escape char... 2837 $new_string .= $escaped_escape_char; 2838 $start = $es_pos + 1; 2839 } 2840 } 2841 $string = $new_string . $this->hsc(substr($string, $start)); 2842 } else { 2843 $string = $this->hsc($string); 2844 } 2845 2846 if ($check_linenumbers) { 2847 // Are line numbers used? If, we should end the string before 2848 // the newline and begin it again (so when <li>s are put in the source 2849 // remains XHTML compliant) 2850 // note to self: This opens up possibility of config files specifying 2851 // that languages can/cannot have multiline strings??? 2852 $string = str_replace("\n", "</span>\n<span$string_attributes>", $string); 2853 } 2854 2855 $result .= "<span$string_attributes>" . $string . '</span>'; 2856 $string = ''; 2857 continue; 2858 } else { 2859 //Have a look for regexp comments 2860 if ($i == $next_comment_regexp_pos) { 2861 $COMMENT_MATCHED = true; 2862 $comment = $comment_regexp_cache_per_key[$next_comment_regexp_key]; 2863 $test_str = $this->hsc(substr($part, $i, $comment['length'])); 2864 2865 //@todo If remove important do remove here 2866 if ($this->lexic_permissions['COMMENTS']['MULTI']) { 2867 if (!$this->use_classes) { 2868 $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment['key']] . '"'; 2869 } else { 2870 $attributes = ' class="co' . $comment['key'] . '"'; 2871 } 2872 2873 $test_str = "<span$attributes>" . $test_str . "</span>"; 2874 2875 // Short-cut through all the multiline code 2876 if ($check_linenumbers) { 2877 // strreplace to put close span and open span around multiline newlines 2878 $test_str = str_replace( 2879 "\n", "</span>\n<span$attributes>", 2880 str_replace("\n ", "\n&nbsp;", $test_str) 2881 ); 2882 } 2883 } 2884 2885 $i += $comment['length'] - 1; 2886 2887 // parse the rest 2888 $result .= $this->parse_non_string_part($stuff_to_parse); 2889 $stuff_to_parse = ''; 2890 } 2891 2892 // If we haven't matched a regexp comment, try multi-line comments 2893 if (!$COMMENT_MATCHED) { 2894 // Is this a multiline comment? 2895 if (!empty($this->language_data['COMMENT_MULTI']) && $next_comment_multi_pos < $i) { 2896 $next_comment_multi_pos = $length; 2897 foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) { 2898 $match_i = false; 2899 if (isset($comment_multi_cache_per_key[$open]) && 2900 ($comment_multi_cache_per_key[$open] >= $i || 2901 $comment_multi_cache_per_key[$open] === false)) { 2902 // we have already matched something 2903 if ($comment_multi_cache_per_key[$open] === false) { 2904 // this comment is never matched 2905 continue; 2906 } 2907 $match_i = $comment_multi_cache_per_key[$open]; 2908 } elseif (($match_i = stripos($part, $open, $i)) !== false) { 2909 $comment_multi_cache_per_key[$open] = $match_i; 2910 } else { 2911 $comment_multi_cache_per_key[$open] = false; 2912 continue; 2913 } 2914 if ($match_i !== false && $match_i < $next_comment_multi_pos) { 2915 $next_comment_multi_pos = $match_i; 2916 $next_open_comment_multi = $open; 2917 if ($match_i === $i) { 2918 break; 2919 } 2920 } 2921 } 2922 } 2923 if ($i == $next_comment_multi_pos) { 2924 $open = $next_open_comment_multi; 2925 $close = $this->language_data['COMMENT_MULTI'][$open]; 2926 $open_strlen = strlen($open); 2927 $close_strlen = strlen($close); 2928 $COMMENT_MATCHED = true; 2929 $test_str_match = $open; 2930 //@todo If remove important do remove here 2931 if ($this->lexic_permissions['COMMENTS']['MULTI'] || 2932 $open == GESHI_START_IMPORTANT) { 2933 if ($open != GESHI_START_IMPORTANT) { 2934 if (!$this->use_classes) { 2935 $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"'; 2936 } else { 2937 $attributes = ' class="coMULTI"'; 2938 } 2939 $test_str = "<span$attributes>" . $this->hsc($open); 2940 } else { 2941 if (!$this->use_classes) { 2942 $attributes = ' style="' . $this->important_styles . '"'; 2943 } else { 2944 $attributes = ' class="imp"'; 2945 } 2946 2947 // We don't include the start of the comment if it's an 2948 // "important" part 2949 $test_str = "<span$attributes>"; 2950 } 2951 } else { 2952 $test_str = $this->hsc($open); 2953 } 2954 2955 $close_pos = strpos( $part, $close, $i + $open_strlen ); 2956 2957 if ($close_pos === false) { 2958 $close_pos = $length; 2959 } 2960 2961 // Short-cut through all the multiline code 2962 $rest_of_comment = $this->hsc(substr($part, $i + $open_strlen, $close_pos - $i - $open_strlen + $close_strlen)); 2963 if (($this->lexic_permissions['COMMENTS']['MULTI'] || 2964 $test_str_match == GESHI_START_IMPORTANT) && 2965 $check_linenumbers) { 2966 2967 // strreplace to put close span and open span around multiline newlines 2968 $test_str .= str_replace( 2969 "\n", "</span>\n<span$attributes>", 2970 str_replace("\n ", "\n&nbsp;", $rest_of_comment) 2971 ); 2972 } else { 2973 $test_str .= $rest_of_comment; 2974 } 2975 2976 if ($this->lexic_permissions['COMMENTS']['MULTI'] || 2977 $test_str_match == GESHI_START_IMPORTANT) { 2978 $test_str .= '</span>'; 2979 } 2980 2981 $i = $close_pos + $close_strlen - 1; 2982 2983 // parse the rest 2984 $result .= $this->parse_non_string_part($stuff_to_parse); 2985 $stuff_to_parse = ''; 2986 } 2987 } 2988 2989 // If we haven't matched a multiline comment, try single-line comments 2990 if (!$COMMENT_MATCHED) { 2991 // cache potential single line comment occurances 2992 if (!empty($this->language_data['COMMENT_SINGLE']) && $next_comment_single_pos < $i) { 2993 $next_comment_single_pos = $length; 2994 foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key => $comment_mark) { 2995 $match_i = false; 2996 if (isset($comment_single_cache_per_key[$comment_key]) && 2997 ($comment_single_cache_per_key[$comment_key] >= $i || 2998 $comment_single_cache_per_key[$comment_key] === false)) { 2999 // we have already matched something 3000 if ($comment_single_cache_per_key[$comment_key] === false) { 3001 // this comment is never matched 3002 continue; 3003 } 3004 $match_i = $comment_single_cache_per_key[$comment_key]; 3005 } elseif ( 3006 // case sensitive comments 3007 ($this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] && 3008 ($match_i = stripos($part, $comment_mark, $i)) !== false) || 3009 // non case sensitive 3010 (!$this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] && 3011 (($match_i = strpos($part, $comment_mark, $i)) !== false))) { 3012 $comment_single_cache_per_key[$comment_key] = $match_i; 3013 } else { 3014 $comment_single_cache_per_key[$comment_key] = false; 3015 continue; 3016 } 3017 if ($match_i !== false && $match_i < $next_comment_single_pos) { 3018 $next_comment_single_pos = $match_i; 3019 $next_comment_single_key = $comment_key; 3020 if ($match_i === $i) { 3021 break; 3022 } 3023 } 3024 } 3025 } 3026 if ($next_comment_single_pos == $i) { 3027 $comment_key = $next_comment_single_key; 3028 $comment_mark = $this->language_data['COMMENT_SINGLE'][$comment_key]; 3029 $com_len = strlen($comment_mark); 3030 3031 // This check will find special variables like $# in bash 3032 // or compiler directives of Delphi beginning {$ 3033 if ((empty($sc_disallowed_before) || ($i == 0) || 3034 (false === strpos($sc_disallowed_before, $part[$i-1]))) && 3035 (empty($sc_disallowed_after) || ($length <= $i + $com_len) || 3036 (false === strpos($sc_disallowed_after, $part[$i + $com_len])))) 3037 { 3038 // this is a valid comment 3039 $COMMENT_MATCHED = true; 3040 if ($this->lexic_permissions['COMMENTS'][$comment_key]) { 3041 if (!$this->use_classes) { 3042 $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment_key] . '"'; 3043 } else { 3044 $attributes = ' class="co' . $comment_key . '"'; 3045 } 3046 $test_str = "<span$attributes>" . $this->hsc($this->change_case($comment_mark)); 3047 } else { 3048 $test_str = $this->hsc($comment_mark); 3049 } 3050 3051 //Check if this comment is the last in the source 3052 $close_pos = strpos($part, "\n", $i); 3053 $oops = false; 3054 if ($close_pos === false) { 3055 $close_pos = $length; 3056 $oops = true; 3057 } 3058 $test_str .= $this->hsc(substr($part, $i + $com_len, $close_pos - $i - $com_len)); 3059 if ($this->lexic_permissions['COMMENTS'][$comment_key]) { 3060 $test_str .= "</span>"; 3061 } 3062 3063 // Take into account that the comment might be the last in the source 3064 if (!$oops) { 3065 $test_str .= "\n"; 3066 } 3067 3068 $i = $close_pos; 3069 3070 // parse the rest 3071 $result .= $this->parse_non_string_part($stuff_to_parse); 3072 $stuff_to_parse = ''; 3073 } 3074 } 3075 } 3076 } 3077 3078 // Where are we adding this char? 3079 if (!$COMMENT_MATCHED) { 3080 $stuff_to_parse .= $char; 3081 } else { 3082 $result .= $test_str; 3083 unset($test_str); 3084 $COMMENT_MATCHED = false; 3085 } 3086 } 3087 // Parse the last bit 3088 $result .= $this->parse_non_string_part($stuff_to_parse); 3089 $stuff_to_parse = ''; 3090 } else { 3091 $result .= $this->hsc($part); 3092 } 3093 // Close the <span> that surrounds the block 3094 if ($STRICTATTRS != '') { 3095 $result = str_replace("\n", "</span>\n<span$STRICTATTRS>", $result); 3096 $result .= '</span>'; 3097 } 3098 3099 $endresult .= $result; 3100 unset($part, $parts[$key], $result); 3101 } 3102 3103 //This fix is related to SF#1923020, but has to be applied regardless of 3104 //actually highlighting symbols. 3105 /** NOTE: memorypeak #3 */ 3106 $endresult = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $endresult); 3107 3108 // // Parse the last stuff (redundant?) 3109 // $result .= $this->parse_non_string_part($stuff_to_parse); 3110 3111 // Lop off the very first and last spaces 3112 // $result = substr($result, 1, -1); 3113 3114 // We're finished: stop timing 3115 $this->set_time($start_time, microtime()); 3116 3117 $this->finalise($endresult); 3118 return $endresult; 3119 } 3120 3121 /** 3122 * Swaps out spaces and tabs for HTML indentation. Not needed if 3123 * the code is in a pre block... 3124 * 3125 * @param string The source to indent (reference!) 3126 * @since 1.0.0 3127 * @access private 3128 */ 3129 function indent(&$result) { 3130 /// Replace tabs with the correct number of spaces 3131 if (false !== strpos($result, "\t")) { 3132 $lines = explode("\n", $result); 3133 $result = null;//Save memory while we process the lines individually 3134 $tab_width = $this->get_real_tab_width(); 3135 $tab_string = '&nbsp;' . str_repeat(' ', $tab_width); 3136 3137 for ($key = 0, $n = count($lines); $key < $n; $key++) { 3138 $line = $lines[$key]; 3139 if (false === strpos($line, "\t")) { 3140 continue; 3141 } 3142 3143 $pos = 0; 3144 $length = strlen($line); 3145 $lines[$key] = ''; // reduce memory 3146 3147 $IN_TAG = false; 3148 for ($i = 0; $i < $length; ++$i) { 3149 $char = $line[$i]; 3150 // Simple engine to work out whether we're in a tag. 3151 // If we are we modify $pos. This is so we ignore HTML 3152 // in the line and only workout the tab replacement 3153 // via the actual content of the string 3154 // This test could be improved to include strings in the 3155 // html so that < or > would be allowed in user's styles 3156 // (e.g. quotes: '<' '>'; or similar) 3157 if ($IN_TAG) { 3158 if ('>' == $char) { 3159 $IN_TAG = false; 3160 } 3161 $lines[$key] .= $char; 3162 } elseif ('<' == $char) { 3163 $IN_TAG = true; 3164 $lines[$key] .= '<'; 3165 } elseif ('&' == $char) { 3166 $substr = substr($line, $i + 3, 5); 3167 $posi = strpos($substr, ';'); 3168 if (false === $posi) { 3169 ++$pos; 3170 } else { 3171 $pos -= $posi+2; 3172 } 3173 $lines[$key] .= $char; 3174 } elseif ("\t" == $char) { 3175 $str = ''; 3176 // OPTIMISE - move $strs out. Make an array: 3177 // $tabs = array( 3178 // 1 => '&nbsp;', 3179 // 2 => '&nbsp; ', 3180 // 3 => '&nbsp; &nbsp;' etc etc 3181 // to use instead of building a string every time 3182 $tab_end_width = $tab_width - ($pos % $tab_width); //Moved out of the look as it doesn't change within the loop 3183 if (($pos & 1) || 1 == $tab_end_width) { 3184 $str .= substr($tab_string, 6, $tab_end_width); 3185 } else { 3186 $str .= substr($tab_string, 0, $tab_end_width+5); 3187 } 3188 $lines[$key] .= $str; 3189 $pos += $tab_end_width; 3190 3191 if (false === strpos($line, "\t", $i + 1)) { 3192 $lines[$key] .= substr($line, $i + 1); 3193 break; 3194 } 3195 } elseif (0 == $pos && ' ' == $char) { 3196 $lines[$key] .= '&nbsp;'; 3197 ++$pos; 3198 } else { 3199 $lines[$key] .= $char; 3200 ++$pos; 3201 } 3202 } 3203 } 3204 $result = implode("\n", $lines); 3205 unset($lines);//We don't need the lines separated beyond this --- free them! 3206 } 3207 // Other whitespace 3208 // BenBE: Fix to reduce the number of replacements to be done 3209 $result = preg_replace('/^ /m', '&nbsp;', $result); 3210 $result = str_replace(' ', ' &nbsp;', $result); 3211 3212 if ($this->line_numbers == GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) { 3213 if ($this->line_ending === null) { 3214 $result = nl2br($result); 3215 } else { 3216 $result = str_replace("\n", $this->line_ending, $result); 3217 } 3218 } 3219 } 3220 3221 /** 3222 * Changes the case of a keyword for those languages where a change is asked for 3223 * 3224 * @param string The keyword to change the case of 3225 * @return string The keyword with its case changed 3226 * @since 1.0.0 3227 * @access private 3228 */ 3229 function change_case($instr) { 3230 switch ($this->language_data['CASE_KEYWORDS']) { 3231 case GESHI_CAPS_UPPER: 3232 return strtoupper($instr); 3233 case GESHI_CAPS_LOWER: 3234 return strtolower($instr); 3235 default: 3236 return $instr; 3237 } 3238 } 3239 3240 /** 3241 * Handles replacements of keywords to include markup and links if requested 3242 * 3243 * @param string The keyword to add the Markup to 3244 * @return The HTML for the match found 3245 * @since 1.0.8 3246 * @access private 3247 * 3248 * @todo Get rid of ender in keyword links 3249 */ 3250 function handle_keyword_replace($match) { 3251 $k = $this->_kw_replace_group; 3252 $keyword = $match[0]; 3253 $keyword_match = $match[1]; 3254 3255 $before = ''; 3256 $after = ''; 3257 3258 if ($this->keyword_links) { 3259 // Keyword links have been ebabled 3260 3261 if (isset($this->language_data['URLS'][$k]) && 3262 $this->language_data['URLS'][$k] != '') { 3263 // There is a base group for this keyword 3264 3265 // Old system: strtolower 3266 //$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword); 3267 // New system: get keyword from language file to get correct case 3268 if (!$this->language_data['CASE_SENSITIVE'][$k] && 3269 strpos($this->language_data['URLS'][$k], '{FNAME}') !== false) { 3270 foreach ($this->language_data['KEYWORDS'][$k] as $word) { 3271 if (strcasecmp($word, $keyword_match) == 0) { 3272 break; 3273 } 3274 } 3275 } else { 3276 $word = $keyword_match; 3277 } 3278 3279 $before = '<|UR1|"' . 3280 str_replace( 3281 array( 3282 '{FNAME}', 3283 '{FNAMEL}', 3284 '{FNAMEU}', 3285 '.'), 3286 array( 3287 str_replace('+', '%20', urlencode($this->hsc($word))), 3288 str_replace('+', '%20', urlencode($this->hsc(strtolower($word)))), 3289 str_replace('+', '%20', urlencode($this->hsc(strtoupper($word)))), 3290 '<DOT>'), 3291 $this->language_data['URLS'][$k] 3292 ) . '">'; 3293 $after = '</a>'; 3294 } 3295 } 3296 3297 return $before . '<|/'. $k .'/>' . $this->change_case($keyword) . '|>' . $after; 3298 } 3299 3300 /** 3301 * handles regular expressions highlighting-definitions with callback functions 3302 * 3303 * @note this is a callback, don't use it directly 3304 * 3305 * @param array the matches array 3306 * @return The highlighted string 3307 * @since 1.0.8 3308 * @access private 3309 */ 3310 function handle_regexps_callback($matches) { 3311 // before: "' style=\"' . call_user_func(\"$func\", '\\1') . '\"\\1|>'", 3312 return ' style="' . call_user_func($this->language_data['STYLES']['REGEXPS'][$this->_rx_key], $matches[1]) . '"'. $matches[1] . '|>'; 3313 } 3314 3315 /** 3316 * handles newlines in REGEXPS matches. Set the _hmr_* vars before calling this 3317 * 3318 * @note this is a callback, don't use it directly 3319 * 3320 * @param array the matches array 3321 * @return string 3322 * @since 1.0.8 3323 * @access private 3324 */ 3325 function handle_multiline_regexps($matches) { 3326 $before = $this->_hmr_before; 3327 $after = $this->_hmr_after; 3328 if ($this->_hmr_replace) { 3329 $replace = $this->_hmr_replace; 3330 $search = array(); 3331 3332 foreach (array_keys($matches) as $k) { 3333 $search[] = '\\' . $k; 3334 } 3335 3336 $before = str_replace($search, $matches, $before); 3337 $after = str_replace($search, $matches, $after); 3338 $replace = str_replace($search, $matches, $replace); 3339 } else { 3340 $replace = $matches[0]; 3341 } 3342 return $before 3343 . '<|!REG3XP' . $this->_hmr_key .'!>' 3344 . str_replace("\n", "|>\n<|!REG3XP" . $this->_hmr_key . '!>', $replace) 3345 . '|>' 3346 . $after; 3347 } 3348 3349 /** 3350 * Takes a string that has no strings or comments in it, and highlights 3351 * stuff like keywords, numbers and methods. 3352 * 3353 * @param string The string to parse for keyword, numbers etc. 3354 * @since 1.0.0 3355 * @access private 3356 * @todo BUGGY! Why? Why not build string and return? 3357 */ 3358 function parse_non_string_part($stuff_to_parse) { 3359 $stuff_to_parse = ' ' . $this->hsc($stuff_to_parse); 3360 3361 // Highlight keywords 3362 $disallowed_before = "(?<![a-zA-Z0-9\$_\|\#|^&"; 3363 $disallowed_after = "(?![a-zA-Z0-9_\|%\\-&;"; 3364 if ($this->lexic_permissions['STRINGS']) { 3365 $quotemarks = preg_quote(implode($this->language_data['QUOTEMARKS']), '/'); 3366 $disallowed_before .= $quotemarks; 3367 $disallowed_after .= $quotemarks; 3368 } 3369 $disallowed_before .= "])"; 3370 $disallowed_after .= "])"; 3371 3372 $parser_control_pergroup = false; 3373 if (isset($this->language_data['PARSER_CONTROL'])) { 3374 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) { 3375 $x = 0; // check wether per-keyword-group parser_control is enabled 3376 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'])) { 3377 $disallowed_before = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE']; 3378 ++$x; 3379 } 3380 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'])) { 3381 $disallowed_after = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER']; 3382 ++$x; 3383 } 3384 $parser_control_pergroup = (count($this->language_data['PARSER_CONTROL']['KEYWORDS']) - $x) > 0; 3385 } 3386 } 3387 3388 foreach (array_keys($this->language_data['KEYWORDS']) as $k) { 3389 if (!isset($this->lexic_permissions['KEYWORDS'][$k]) || 3390 $this->lexic_permissions['KEYWORDS'][$k]) { 3391 3392 $case_sensitive = $this->language_data['CASE_SENSITIVE'][$k]; 3393 $modifiers = $case_sensitive ? '' : 'i'; 3394 3395 // NEW in 1.0.8 - per-keyword-group parser control 3396 $disallowed_before_local = $disallowed_before; 3397 $disallowed_after_local = $disallowed_after; 3398 if ($parser_control_pergroup && isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k])) { 3399 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'])) { 3400 $disallowed_before_local = 3401 $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE']; 3402 } 3403 3404 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'])) { 3405 $disallowed_after_local = 3406 $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER']; 3407 } 3408 } 3409 3410 $this->_kw_replace_group = $k; 3411 3412 //NEW in 1.0.8, the cached regexp list 3413 // since we don't want PHP / PCRE to crash due to too large patterns we split them into smaller chunks 3414 for ($set = 0, $set_length = count($this->language_data['CACHED_KEYWORD_LISTS'][$k]); $set < $set_length; ++$set) { 3415 $keywordset =& $this->language_data['CACHED_KEYWORD_LISTS'][$k][$set]; 3416 // Might make a more unique string for putting the number in soon 3417 // Basically, we don't put the styles in yet because then the styles themselves will 3418 // get highlighted if the language has a CSS keyword in it (like CSS, for example ;)) 3419 $stuff_to_parse = preg_replace_callback( 3420 "/$disallowed_before_local({$keywordset})(?!\<DOT\>(?:htm|php|aspx?))$disallowed_after_local/$modifiers", 3421 array($this, 'handle_keyword_replace'), 3422 $stuff_to_parse 3423 ); 3424 } 3425 } 3426 } 3427 3428 // Regular expressions 3429 foreach ($this->language_data['REGEXPS'] as $key => $regexp) { 3430 if ($this->lexic_permissions['REGEXPS'][$key]) { 3431 if (is_array($regexp)) { 3432 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { 3433 // produce valid HTML when we match multiple lines 3434 $this->_hmr_replace = $regexp[GESHI_REPLACE]; 3435 $this->_hmr_before = $regexp[GESHI_BEFORE]; 3436 $this->_hmr_key = $key; 3437 $this->_hmr_after = $regexp[GESHI_AFTER]; 3438 $stuff_to_parse = preg_replace_callback( 3439 "/" . $regexp[GESHI_SEARCH] . "/{$regexp[GESHI_MODIFIERS]}", 3440 array($this, 'handle_multiline_regexps'), 3441 $stuff_to_parse); 3442 $this->_hmr_replace = false; 3443 $this->_hmr_before = ''; 3444 $this->_hmr_after = ''; 3445 } else { 3446 $stuff_to_parse = preg_replace( 3447 '/' . $regexp[GESHI_SEARCH] . '/' . $regexp[GESHI_MODIFIERS], 3448 $regexp[GESHI_BEFORE] . '<|!REG3XP'. $key .'!>' . $regexp[GESHI_REPLACE] . '|>' . $regexp[GESHI_AFTER], 3449 $stuff_to_parse); 3450 } 3451 } else { 3452 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { 3453 // produce valid HTML when we match multiple lines 3454 $this->_hmr_key = $key; 3455 $stuff_to_parse = preg_replace_callback( "/(" . $regexp . ")/", 3456 array($this, 'handle_multiline_regexps'), $stuff_to_parse); 3457 $this->_hmr_key = ''; 3458 } else { 3459 $stuff_to_parse = preg_replace( "/(" . $regexp . ")/", "<|!REG3XP$key!>\\1|>", $stuff_to_parse); 3460 } 3461 } 3462 } 3463 } 3464 3465 // Highlight numbers. As of 1.0.8 we support different types of numbers 3466 $numbers_found = false; 3467 3468 if ($this->lexic_permissions['NUMBERS'] && preg_match($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'], $stuff_to_parse )) { 3469 $numbers_found = true; 3470 3471 //For each of the formats ... 3472 foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) { 3473 //Check if it should be highlighted ... 3474 $stuff_to_parse = preg_replace($regexp, "<|/NUM!$id/>\\1|>", $stuff_to_parse); 3475 } 3476 } 3477 3478 // 3479 // Now that's all done, replace /[number]/ with the correct styles 3480 // 3481 foreach (array_keys($this->language_data['KEYWORDS']) as $k) { 3482 if (!$this->use_classes) { 3483 $attributes = ' style="' . 3484 (isset($this->language_data['STYLES']['KEYWORDS'][$k]) ? 3485 $this->language_data['STYLES']['KEYWORDS'][$k] : "") . '"'; 3486 } else { 3487 $attributes = ' class="kw' . $k . '"'; 3488 } 3489 $stuff_to_parse = str_replace("<|/$k/>", "<|$attributes>", $stuff_to_parse); 3490 } 3491 3492 if ($numbers_found) { 3493 // Put number styles in 3494 foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) { 3495 //Commented out for now, as this needs some review ... 3496 // if ($numbers_permissions & $id) { 3497 //Get the appropriate style ... 3498 //Checking for unset styles is done by the style cache builder ... 3499 if (!$this->use_classes) { 3500 $attributes = ' style="' . $this->language_data['STYLES']['NUMBERS'][$id] . '"'; 3501 } else { 3502 $attributes = ' class="nu'.$id.'"'; 3503 } 3504 3505 //Set in the correct styles ... 3506 $stuff_to_parse = str_replace("/NUM!$id/", $attributes, $stuff_to_parse); 3507 // } 3508 } 3509 } 3510 3511 // Highlight methods and fields in objects 3512 if ($this->lexic_permissions['METHODS'] && $this->language_data['OOLANG']) { 3513 $oolang_spaces = "[\s]*"; 3514 $oolang_before = ""; 3515 $oolang_after = "[a-zA-Z][a-zA-Z0-9_]*"; 3516 if (isset($this->language_data['PARSER_CONTROL'])) { 3517 if (isset($this->language_data['PARSER_CONTROL']['OOLANG'])) { 3518 if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'])) { 3519 $oolang_before = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE']; 3520 } 3521 if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'])) { 3522 $oolang_after = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER']; 3523 } 3524 if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'])) { 3525 $oolang_spaces = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES']; 3526 } 3527 } 3528 } 3529 3530 foreach ($this->language_data['OBJECT_SPLITTERS'] as $key => $splitter) { 3531 if (false !== strpos($stuff_to_parse, $splitter)) { 3532 if (!$this->use_classes) { 3533 $attributes = ' style="' . $this->language_data['STYLES']['METHODS'][$key] . '"'; 3534 } else { 3535 $attributes = ' class="me' . $key . '"'; 3536 } 3537 $stuff_to_parse = preg_replace("/($oolang_before)(" . preg_quote($this->language_data['OBJECT_SPLITTERS'][$key], '/') . ")($oolang_spaces)($oolang_after)/", "\\1\\2\\3<|$attributes>\\4|>", $stuff_to_parse); 3538 } 3539 } 3540 } 3541 3542 // 3543 // Highlight brackets. Yes, I've tried adding a semi-colon to this list. 3544 // You try it, and see what happens ;) 3545 // TODO: Fix lexic permissions not converting entities if shouldn't 3546 // be highlighting regardless 3547 // 3548 if ($this->lexic_permissions['BRACKETS']) { 3549 $stuff_to_parse = str_replace( $this->language_data['CACHE_BRACKET_MATCH'], 3550 $this->language_data['CACHE_BRACKET_REPLACE'], $stuff_to_parse ); 3551 } 3552 3553 3554 //FIX for symbol highlighting ... 3555 if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) { 3556 //Get all matches and throw away those witin a block that is already highlighted... (i.e. matched by a regexp) 3557 $n_symbols = preg_match_all("/<\|(?:<DOT>|[^>])+>(?:(?!\|>).*?)\|>|<\/a>|(?:" . $this->language_data['SYMBOL_SEARCH'] . ")+(?![^<]+?>)/", $stuff_to_parse, $pot_symbols, PREG_OFFSET_CAPTURE | PREG_SET_ORDER); 3558 $global_offset = 0; 3559 for ($s_id = 0; $s_id < $n_symbols; ++$s_id) { 3560 $symbol_match = $pot_symbols[$s_id][0][0]; 3561 if (strpos($symbol_match, '<') !== false || strpos($symbol_match, '>') !== false) { 3562 // already highlighted blocks _must_ include either < or > 3563 // so if this conditional applies, we have to skip this match 3564 // BenBE: UNLESS the block contains <SEMI> or <PIPE> 3565 if(strpos($symbol_match, '<SEMI>') === false && 3566 strpos($symbol_match, '<PIPE>') === false) { 3567 continue; 3568 } 3569 } 3570 3571 // if we reach this point, we have a valid match which needs to be highlighted 3572 3573 $symbol_length = strlen($symbol_match); 3574 $symbol_offset = $pot_symbols[$s_id][0][1]; 3575 unset($pot_symbols[$s_id]); 3576 $symbol_end = $symbol_length + $symbol_offset; 3577 $symbol_hl = ""; 3578 3579 // if we have multiple styles, we have to handle them properly 3580 if ($this->language_data['MULTIPLE_SYMBOL_GROUPS']) { 3581 $old_sym = -1; 3582 // Split the current stuff to replace into its atomic symbols ... 3583 preg_match_all("/" . $this->language_data['SYMBOL_SEARCH'] . "/", $symbol_match, $sym_match_syms, PREG_PATTERN_ORDER); 3584 foreach ($sym_match_syms[0] as $sym_ms) { 3585 //Check if consequtive symbols belong to the same group to save output ... 3586 if (isset($this->language_data['SYMBOL_DATA'][$sym_ms]) 3587 && ($this->language_data['SYMBOL_DATA'][$sym_ms] != $old_sym)) { 3588 if (-1 != $old_sym) { 3589 $symbol_hl .= "|>"; 3590 } 3591 $old_sym = $this->language_data['SYMBOL_DATA'][$sym_ms]; 3592 if (!$this->use_classes) { 3593 $symbol_hl .= '<| style="' . $this->language_data['STYLES']['SYMBOLS'][$old_sym] . '">'; 3594 } else { 3595 $symbol_hl .= '<| class="sy' . $old_sym . '">'; 3596 } 3597 } 3598 $symbol_hl .= $sym_ms; 3599 } 3600 unset($sym_match_syms); 3601 3602 //Close remaining tags and insert the replacement at the right position ... 3603 //Take caution if symbol_hl is empty to avoid doubled closing spans. 3604 if (-1 != $old_sym) { 3605 $symbol_hl .= "|>"; 3606 } 3607 } else { 3608 if (!$this->use_classes) { 3609 $symbol_hl = '<| style="' . $this->language_data['STYLES']['SYMBOLS'][0] . '">'; 3610 } else { 3611 $symbol_hl = '<| class="sy0">'; 3612 } 3613 $symbol_hl .= $symbol_match . '|>'; 3614 } 3615 3616 $stuff_to_parse = substr_replace($stuff_to_parse, $symbol_hl, $symbol_offset + $global_offset, $symbol_length); 3617 3618 // since we replace old text with something of different size, 3619 // we'll have to keep track of the differences 3620 $global_offset += strlen($symbol_hl) - $symbol_length; 3621 } 3622 } 3623 //FIX for symbol highlighting ... 3624 3625 // Add class/style for regexps 3626 foreach (array_keys($this->language_data['REGEXPS']) as $key) { 3627 if ($this->lexic_permissions['REGEXPS'][$key]) { 3628 if (is_callable($this->language_data['STYLES']['REGEXPS'][$key])) { 3629 $this->_rx_key = $key; 3630 $stuff_to_parse = preg_replace_callback("/!REG3XP$key!(.*)\|>/U", 3631 array($this, 'handle_regexps_callback'), 3632 $stuff_to_parse); 3633 } else { 3634 if (!$this->use_classes) { 3635 $attributes = ' style="' . $this->language_data['STYLES']['REGEXPS'][$key] . '"'; 3636 } else { 3637 if (is_array($this->language_data['REGEXPS'][$key]) && 3638 array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$key])) { 3639 $attributes = ' class="' . 3640 $this->language_data['REGEXPS'][$key][GESHI_CLASS] . '"'; 3641 } else { 3642 $attributes = ' class="re' . $key . '"'; 3643 } 3644 } 3645 $stuff_to_parse = str_replace("!REG3XP$key!", "$attributes", $stuff_to_parse); 3646 } 3647 } 3648 } 3649 3650 // Replace <DOT> with . for urls 3651 $stuff_to_parse = str_replace('<DOT>', '.', $stuff_to_parse); 3652 // Replace <|UR1| with <a href= for urls also 3653 if (isset($this->link_styles[GESHI_LINK])) { 3654 if ($this->use_classes) { 3655 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse); 3656 } else { 3657 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' style="' . $this->link_styles[GESHI_LINK] . '" href=', $stuff_to_parse); 3658 } 3659 } else { 3660 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse); 3661 } 3662 3663 // 3664 // NOW we add the span thingy ;) 3665 // 3666 3667 $stuff_to_parse = str_replace('<|', '<span', $stuff_to_parse); 3668 $stuff_to_parse = str_replace ( '|>', '</span>', $stuff_to_parse ); 3669 return substr($stuff_to_parse, 1); 3670 } 3671 3672 /** 3673 * Sets the time taken to parse the code 3674 * 3675 * @param microtime The time when parsing started 3676 * @param microtime The time when parsing ended 3677 * @since 1.0.2 3678 * @access private 3679 */ 3680 function set_time($start_time, $end_time) { 3681 $start = explode(' ', $start_time); 3682 $end = explode(' ', $end_time); 3683 $this->time = $end[0] + $end[1] - $start[0] - $start[1]; 3684 } 3685 3686 /** 3687 * Gets the time taken to parse the code 3688 * 3689 * @return double The time taken to parse the code 3690 * @since 1.0.2 3691 */ 3692 function get_time() { 3693 return $this->time; 3694 } 3695 3696 /** 3697 * Merges arrays recursively, overwriting values of the first array with values of later arrays 3698 * 3699 * @since 1.0.8 3700 * @access private 3701 */ 3702 function merge_arrays() { 3703 $arrays = func_get_args(); 3704 $narrays = count($arrays); 3705 3706 // check arguments 3707 // comment out if more performance is necessary (in this case the foreach loop will trigger a warning if the argument is not an array) 3708 for ($i = 0; $i < $narrays; $i ++) { 3709 if (!is_array($arrays[$i])) { 3710 // also array_merge_recursive returns nothing in this case 3711 trigger_error('Argument #' . ($i+1) . ' is not an array - trying to merge array with scalar! Returning false!', E_USER_WARNING); 3712 return false; 3713 } 3714 } 3715 3716 // the first array is in the output set in every case 3717 $ret = $arrays[0]; 3718 3719 // merege $ret with the remaining arrays 3720 for ($i = 1; $i < $narrays; $i ++) { 3721 foreach ($arrays[$i] as $key => $value) { 3722 if (is_array($value) && isset($ret[$key])) { 3723 // if $ret[$key] is not an array you try to merge an scalar value with an array - the result is not defined (incompatible arrays) 3724 // in this case the call will trigger an E_USER_WARNING and the $ret[$key] will be false. 3725 $ret[$key] = $this->merge_arrays($ret[$key], $value); 3726 } else { 3727 $ret[$key] = $value; 3728 } 3729 } 3730 } 3731 3732 return $ret; 3733 } 3734 3735 /** 3736 * Gets language information and stores it for later use 3737 * 3738 * @param string The filename of the language file you want to load 3739 * @since 1.0.0 3740 * @access private 3741 * @todo Needs to load keys for lexic permissions for keywords, regexps etc 3742 */ 3743 function load_language($file_name) { 3744 if ($file_name == $this->loaded_language) { 3745 // this file is already loaded! 3746 return; 3747 } 3748 3749 //Prepare some stuff before actually loading the language file 3750 $this->loaded_language = $file_name; 3751 $this->parse_cache_built = false; 3752 $this->enable_highlighting(); 3753 $language_data = array(); 3754 3755 //Load the language file 3756 require $file_name; 3757 3758 // Perhaps some checking might be added here later to check that 3759 // $language data is a valid thing but maybe not 3760 $this->language_data = $language_data; 3761 3762 // Set strict mode if should be set 3763 $this->strict_mode = $this->language_data['STRICT_MODE_APPLIES']; 3764 3765 // Set permissions for all lexics to true 3766 // so they'll be highlighted by default 3767 foreach (array_keys($this->language_data['KEYWORDS']) as $key) { 3768 if (!empty($this->language_data['KEYWORDS'][$key])) { 3769 $this->lexic_permissions['KEYWORDS'][$key] = true; 3770 } else { 3771 $this->lexic_permissions['KEYWORDS'][$key] = false; 3772 } 3773 } 3774 3775 foreach (array_keys($this->language_data['COMMENT_SINGLE']) as $key) { 3776 $this->lexic_permissions['COMMENTS'][$key] = true; 3777 } 3778 foreach (array_keys($this->language_data['REGEXPS']) as $key) { 3779 $this->lexic_permissions['REGEXPS'][$key] = true; 3780 } 3781 3782 // for BenBE and future code reviews: 3783 // we can use empty here since we only check for existance and emptiness of an array 3784 // if it is not an array at all but rather false or null this will work as intended as well 3785 // even if $this->language_data['PARSER_CONTROL'] is undefined this won't trigger a notice 3786 if (!empty($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'])) { 3787 foreach ($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'] as $flag => $value) { 3788 // it's either true or false and maybe is true as well 3789 $perm = $value !== GESHI_NEVER; 3790 if ($flag == 'ALL') { 3791 $this->enable_highlighting($perm); 3792 continue; 3793 } 3794 if (!isset($this->lexic_permissions[$flag])) { 3795 // unknown lexic permission 3796 continue; 3797 } 3798 if (is_array($this->lexic_permissions[$flag])) { 3799 foreach ($this->lexic_permissions[$flag] as $key => $val) { 3800 $this->lexic_permissions[$flag][$key] = $perm; 3801 } 3802 } else { 3803 $this->lexic_permissions[$flag] = $perm; 3804 } 3805 } 3806 unset($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS']); 3807 } 3808 3809 //Fix: Problem where hardescapes weren't handled if no ESCAPE_CHAR was given 3810 //You need to set one for HARDESCAPES only in this case. 3811 if(!isset($this->language_data['HARDCHAR'])) { 3812 $this->language_data['HARDCHAR'] = $this->language_data['ESCAPE_CHAR']; 3813 } 3814 3815 //NEW in 1.0.8: Allow styles to be loaded from a separate file to override defaults 3816 $style_filename = substr($file_name, 0, -4) . '.style.php'; 3817 if (is_readable($style_filename)) { 3818 //Clear any style_data that could have been set before ... 3819 if (isset($style_data)) { 3820 unset($style_data); 3821 } 3822 3823 //Read the Style Information from the style file 3824 include $style_filename; 3825 3826 //Apply the new styles to our current language styles 3827 if (isset($style_data) && is_array($style_data)) { 3828 $this->language_data['STYLES'] = 3829 $this->merge_arrays($this->language_data['STYLES'], $style_data); 3830 } 3831 } 3832 } 3833 3834 /** 3835 * Takes the parsed code and various options, and creates the HTML 3836 * surrounding it to make it look nice. 3837 * 3838 * @param string The code already parsed (reference!) 3839 * @since 1.0.0 3840 * @access private 3841 */ 3842 function finalise(&$parsed_code) { 3843 // Remove end parts of important declarations 3844 // This is BUGGY!! My fault for bad code: fix coming in 1.2 3845 // @todo Remove this crap 3846 if ($this->enable_important_blocks && 3847 (strpos($parsed_code, $this->hsc(GESHI_START_IMPORTANT)) === false)) { 3848 $parsed_code = str_replace($this->hsc(GESHI_END_IMPORTANT), '', $parsed_code); 3849 } 3850 3851 // Add HTML whitespace stuff if we're using the <div> header 3852 if ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) { 3853 $this->indent($parsed_code); 3854 } 3855 3856 // purge some unnecessary stuff 3857 /** NOTE: memorypeak #1 */ 3858 $parsed_code = preg_replace('#<span[^>]+>(\s*)</span>#', '\\1', $parsed_code); 3859 3860 // If we are using IDs for line numbers, there needs to be an overall 3861 // ID set to prevent collisions. 3862 if ($this->add_ids && !$this->overall_id) { 3863 $this->overall_id = 'geshi-' . substr(md5(microtime()), 0, 4); 3864 } 3865 3866 // Get code into lines 3867 /** NOTE: memorypeak #2 */ 3868 $code = explode("\n", $parsed_code); 3869 $parsed_code = $this->header(); 3870 3871 // If we're using line numbers, we insert <li>s and appropriate 3872 // markup to style them (otherwise we don't need to do anything) 3873 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) { 3874 // If we're using the <pre> header, we shouldn't add newlines because 3875 // the <pre> will line-break them (and the <li>s already do this for us) 3876 $ls = ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) ? "\n" : ''; 3877 3878 // Set vars to defaults for following loop 3879 $i = 0; 3880 3881 // Foreach line... 3882 for ($i = 0, $n = count($code); $i < $n;) { 3883 //Reset the attributes for a new line ... 3884 $attrs = array(); 3885 3886 // Make lines have at least one space in them if they're empty 3887 // BenBE: Checking emptiness using trim instead of relying on blanks 3888 if ('' == trim($code[$i])) { 3889 $code[$i] = '&nbsp;'; 3890 } 3891 3892 // If this is a "special line"... 3893 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS && 3894 $i % $this->line_nth_row == ($this->line_nth_row - 1)) { 3895 // Set the attributes to style the line 3896 if ($this->use_classes) { 3897 //$attr = ' class="li2"'; 3898 $attrs['class'][] = 'li2'; 3899 $def_attr = ' class="de2"'; 3900 } else { 3901 //$attr = ' style="' . $this->line_style2 . '"'; 3902 $attrs['style'][] = $this->line_style2; 3903 // This style "covers up" the special styles set for special lines 3904 // so that styles applied to special lines don't apply to the actual 3905 // code on that line 3906 $def_attr = ' style="' . $this->code_style . '"'; 3907 } 3908 } else { 3909 if ($this->use_classes) { 3910 //$attr = ' class="li1"'; 3911 $attrs['class'][] = 'li1'; 3912 $def_attr = ' class="de1"'; 3913 } else { 3914 //$attr = ' style="' . $this->line_style1 . '"'; 3915 $attrs['style'][] = $this->line_style1; 3916 $def_attr = ' style="' . $this->code_style . '"'; 3917 } 3918 } 3919 3920 //Check which type of tag to insert for this line 3921 if ($this->header_type == GESHI_HEADER_PRE_VALID) { 3922 $start = "<pre$def_attr>"; 3923 $end = '</pre>'; 3924 } else { 3925 // Span or div? 3926 $start = "<div$def_attr>"; 3927 $end = '</div>'; 3928 } 3929 3930 ++$i; 3931 3932 // Are we supposed to use ids? If so, add them 3933 if ($this->add_ids) { 3934 $attrs['id'][] = "$this->overall_id-$i"; 3935 } 3936 3937 //Is this some line with extra styles??? 3938 if (in_array($i, $this->highlight_extra_lines)) { 3939 if ($this->use_classes) { 3940 if (isset($this->highlight_extra_lines_styles[$i])) { 3941 $attrs['class'][] = "lx$i"; 3942 } else { 3943 $attrs['class'][] = "ln-xtra"; 3944 } 3945 } else { 3946 array_push($attrs['style'], $this->get_line_style($i)); 3947 } 3948 } 3949 3950 // Add in the line surrounded by appropriate list HTML 3951 $attr_string = ''; 3952 foreach ($attrs as $key => $attr) { 3953 $attr_string .= ' ' . $key . '="' . implode(' ', $attr) . '"'; 3954 } 3955 3956 $parsed_code .= "<li$attr_string>$start{$code[$i-1]}$end</li>$ls"; 3957 unset($code[$i - 1]); 3958 } 3959 } else { 3960 $n = count($code); 3961 if ($this->use_classes) { 3962 $attributes = ' class="de1"'; 3963 } else { 3964 $attributes = ' style="'. $this->code_style .'"'; 3965 } 3966 if ($this->header_type == GESHI_HEADER_PRE_VALID) { 3967 $parsed_code .= '<pre'. $attributes .'>'; 3968 } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) { 3969 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { 3970 if ($this->use_classes) { 3971 $attrs = ' class="ln"'; 3972 } else { 3973 $attrs = ' style="'. $this->table_linenumber_style .'"'; 3974 } 3975 $parsed_code .= '<td'.$attrs.'><pre'.$attributes.'>'; 3976 // get linenumbers 3977 // we don't merge it with the for below, since it should be better for 3978 // memory consumption this way 3979 // @todo: but... actually it would still be somewhat nice to merge the two loops 3980 // the mem peaks are at different positions 3981 for ($i = 0; $i < $n; ++$i) { 3982 $close = 0; 3983 // fancy lines 3984 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS && 3985 $i % $this->line_nth_row == ($this->line_nth_row - 1)) { 3986 // Set the attributes to style the line 3987 if ($this->use_classes) { 3988 $parsed_code .= '<span class="xtra li2"><span class="de2">'; 3989 } else { 3990 // This style "covers up" the special styles set for special lines 3991 // so that styles applied to special lines don't apply to the actual 3992 // code on that line 3993 $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">' 3994 .'<span style="' . $this->code_style .'">'; 3995 } 3996 $close += 2; 3997 } 3998 //Is this some line with extra styles??? 3999 if (in_array($i + 1, $this->highlight_extra_lines)) { 4000 if ($this->use_classes) { 4001 if (isset($this->highlight_extra_lines_styles[$i])) { 4002 $parsed_code .= "<span class=\"xtra lx$i\">"; 4003 } else { 4004 $parsed_code .= "<span class=\"xtra ln-xtra\">"; 4005 } 4006 } else { 4007 $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">"; 4008 } 4009 ++$close; 4010 } 4011 $parsed_code .= $this->line_numbers_start + $i; 4012 if ($close) { 4013 $parsed_code .= str_repeat('</span>', $close); 4014 } elseif ($i != $n) { 4015 $parsed_code .= "\n"; 4016 } 4017 } 4018 $parsed_code .= '</pre></td><td'.$attributes.'>'; 4019 } 4020 $parsed_code .= '<pre'. $attributes .'>'; 4021 } 4022 // No line numbers, but still need to handle highlighting lines extra. 4023 // Have to use divs so the full width of the code is highlighted 4024 $close = 0; 4025 for ($i = 0; $i < $n; ++$i) { 4026 // Make lines have at least one space in them if they're empty 4027 // BenBE: Checking emptiness using trim instead of relying on blanks 4028 if ('' == trim($code[$i])) { 4029 $code[$i] = '&nbsp;'; 4030 } 4031 // fancy lines 4032 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS && 4033 $i % $this->line_nth_row == ($this->line_nth_row - 1)) { 4034 // Set the attributes to style the line 4035 if ($this->use_classes) { 4036 $parsed_code .= '<span class="xtra li2"><span class="de2">'; 4037 } else { 4038 // This style "covers up" the special styles set for special lines 4039 // so that styles applied to special lines don't apply to the actual 4040 // code on that line 4041 $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">' 4042 .'<span style="' . $this->code_style .'">'; 4043 } 4044 $close += 2; 4045 } 4046 //Is this some line with extra styles??? 4047 if (in_array($i + 1, $this->highlight_extra_lines)) { 4048 if ($this->use_classes) { 4049 if (isset($this->highlight_extra_lines_styles[$i])) { 4050 $parsed_code .= "<span class=\"xtra lx$i\">"; 4051 } else { 4052 $parsed_code .= "<span class=\"xtra ln-xtra\">"; 4053 } 4054 } else { 4055 $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">"; 4056 } 4057 ++$close; 4058 } 4059 4060 $parsed_code .= $code[$i]; 4061 4062 if ($close) { 4063 $parsed_code .= str_repeat('</span>', $close); 4064 $close = 0; 4065 } 4066 elseif ($i + 1 < $n) { 4067 $parsed_code .= "\n"; 4068 } 4069 unset($code[$i]); 4070 } 4071 4072 if ($this->header_type == GESHI_HEADER_PRE_VALID || $this->header_type == GESHI_HEADER_PRE_TABLE) { 4073 $parsed_code .= '</pre>'; 4074 } 4075 if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) { 4076 $parsed_code .= '</td>'; 4077 } 4078 } 4079 4080 $parsed_code .= $this->footer(); 4081 } 4082 4083 /** 4084 * Creates the header for the code block (with correct attributes) 4085 * 4086 * @return string The header for the code block 4087 * @since 1.0.0 4088 * @access private 4089 */ 4090 function header() { 4091 // Get attributes needed 4092 /** 4093 * @todo Document behaviour change - class is outputted regardless of whether 4094 * we're using classes or not. Same with style 4095 */ 4096 $attributes = ' class="' . $this->_genCSSName($this->language); 4097 if ($this->overall_class != '') { 4098 $attributes .= " ".$this->_genCSSName($this->overall_class); 4099 } 4100 $attributes .= '"'; 4101 4102 if ($this->overall_id != '') { 4103 $attributes .= " id=\"{$this->overall_id}\""; 4104 } 4105 if ($this->overall_style != '' && !$this->use_classes) { 4106 $attributes .= ' style="' . $this->overall_style . '"'; 4107 } 4108 4109 $ol_attributes = ''; 4110 4111 if ($this->line_numbers_start != 1) { 4112 $ol_attributes .= ' start="' . $this->line_numbers_start . '"'; 4113 } 4114 4115 // Get the header HTML 4116 $header = $this->header_content; 4117 if ($header) { 4118 if ($this->header_type == GESHI_HEADER_PRE || $this->header_type == GESHI_HEADER_PRE_VALID) { 4119 $header = str_replace("\n", '', $header); 4120 } 4121 $header = $this->replace_keywords($header); 4122 4123 if ($this->use_classes) { 4124 $attr = ' class="head"'; 4125 } else { 4126 $attr = " style=\"{$this->header_content_style}\""; 4127 } 4128 if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) { 4129 $header = "<thead><tr><td colspan=\"2\" $attr>$header</td></tr></thead>"; 4130 } else { 4131 $header = "<div$attr>$header</div>"; 4132 } 4133 } 4134 4135 if (GESHI_HEADER_NONE == $this->header_type) { 4136 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { 4137 return "$header<ol$attributes$ol_attributes>"; 4138 } 4139 return $header . ($this->force_code_block ? '<div>' : ''); 4140 } 4141 4142 // Work out what to return and do it 4143 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { 4144 if ($this->header_type == GESHI_HEADER_PRE) { 4145 return "<pre$attributes>$header<ol$ol_attributes>"; 4146 } elseif ($this->header_type == GESHI_HEADER_DIV || 4147 $this->header_type == GESHI_HEADER_PRE_VALID) { 4148 return "<div$attributes>$header<ol$ol_attributes>"; 4149 } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) { 4150 return "<table$attributes>$header<tbody><tr class=\"li1\">"; 4151 } 4152 } else { 4153 if ($this->header_type == GESHI_HEADER_PRE) { 4154 return "<pre$attributes>$header" . 4155 ($this->force_code_block ? '<div>' : ''); 4156 } else { 4157 return "<div$attributes>$header" . 4158 ($this->force_code_block ? '<div>' : ''); 4159 } 4160 } 4161 } 4162 4163 /** 4164 * Returns the footer for the code block. 4165 * 4166 * @return string The footer for the code block 4167 * @since 1.0.0 4168 * @access private 4169 */ 4170 function footer() { 4171 $footer = $this->footer_content; 4172 if ($footer) { 4173 if ($this->header_type == GESHI_HEADER_PRE) { 4174 $footer = str_replace("\n", '', $footer);; 4175 } 4176 $footer = $this->replace_keywords($footer); 4177 4178 if ($this->use_classes) { 4179 $attr = ' class="foot"'; 4180 } else { 4181 $attr = " style=\"{$this->footer_content_style}\""; 4182 } 4183 if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) { 4184 $footer = "<tfoot><tr><td colspan=\"2\">$footer</td></tr></tfoot>"; 4185 } else { 4186 $footer = "<div$attr>$footer</div>"; 4187 } 4188 } 4189 4190 if (GESHI_HEADER_NONE == $this->header_type) { 4191 return ($this->line_numbers != GESHI_NO_LINE_NUMBERS) ? '</ol>' . $footer : $footer; 4192 } 4193 4194 if ($this->header_type == GESHI_HEADER_DIV || $this->header_type == GESHI_HEADER_PRE_VALID) { 4195 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { 4196 return "</ol>$footer</div>"; 4197 } 4198 return ($this->force_code_block ? '</div>' : '') . 4199 "$footer</div>"; 4200 } 4201 elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) { 4202 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { 4203 return "</tr></tbody>$footer</table>"; 4204 } 4205 return ($this->force_code_block ? '</div>' : '') . 4206 "$footer</div>"; 4207 } 4208 else { 4209 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) { 4210 return "</ol>$footer</pre>"; 4211 } 4212 return ($this->force_code_block ? '</div>' : '') . 4213 "$footer</pre>"; 4214 } 4215 } 4216 4217 /** 4218 * Replaces certain keywords in the header and footer with 4219 * certain configuration values 4220 * 4221 * @param string The header or footer content to do replacement on 4222 * @return string The header or footer with replaced keywords 4223 * @since 1.0.2 4224 * @access private 4225 */ 4226 function replace_keywords($instr) { 4227 $keywords = $replacements = array(); 4228 4229 $keywords[] = '<TIME>'; 4230 $keywords[] = '{TIME}'; 4231 $replacements[] = $replacements[] = number_format($time = $this->get_time(), 3); 4232 4233 $keywords[] = '<LANGUAGE>'; 4234 $keywords[] = '{LANGUAGE}'; 4235 $replacements[] = $replacements[] = $this->language_data['LANG_NAME']; 4236 4237 $keywords[] = '<VERSION>'; 4238 $keywords[] = '{VERSION}'; 4239 $replacements[] = $replacements[] = GESHI_VERSION; 4240 4241 $keywords[] = '<SPEED>'; 4242 $keywords[] = '{SPEED}'; 4243 if ($time <= 0) { 4244 $speed = 'N/A'; 4245 } else { 4246 $speed = strlen($this->source) / $time; 4247 if ($speed >= 1024) { 4248 $speed = sprintf("%.2f KB/s", $speed / 1024.0); 4249 } else { 4250 $speed = sprintf("%.0f B/s", $speed); 4251 } 4252 } 4253 $replacements[] = $replacements[] = $speed; 4254 4255 return str_replace($keywords, $replacements, $instr); 4256 } 4257 4258 /** 4259 * Secure replacement for PHP built-in function htmlspecialchars(). 4260 * 4261 * See ticket #427 (http://wush.net/trac/wikka/ticket/427) for the rationale 4262 * for this replacement function. 4263 * 4264 * The INTERFACE for this function is almost the same as that for 4265 * htmlspecialchars(), with the same default for quote style; however, there 4266 * is no 'charset' parameter. The reason for this is as follows: 4267 * 4268 * The PHP docs say: 4269 * "The third argument charset defines character set used in conversion." 4270 * 4271 * I suspect PHP's htmlspecialchars() is working at the byte-value level and 4272 * thus _needs_ to know (or asssume) a character set because the special 4273 * characters to be replaced could exist at different code points in 4274 * different character sets. (If indeed htmlspecialchars() works at 4275 * byte-value level that goes some way towards explaining why the 4276 * vulnerability would exist in this function, too, and not only in 4277 * htmlentities() which certainly is working at byte-value level.) 4278 * 4279 * This replacement function however works at character level and should 4280 * therefore be "immune" to character set differences - so no charset 4281 * parameter is needed or provided. If a third parameter is passed, it will 4282 * be silently ignored. 4283 * 4284 * In the OUTPUT there is a minor difference in that we use '&#39;' instead 4285 * of PHP's '&#039;' for a single quote: this provides compatibility with 4286 * get_html_translation_table(HTML_SPECIALCHARS, ENT_QUOTES) 4287 * (see comment by mikiwoz at yahoo dot co dot uk on 4288 * http://php.net/htmlspecialchars); it also matches the entity definition 4289 * for XML 1.0 4290 * (http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters). 4291 * Like PHP we use a numeric character reference instead of '&apos;' for the 4292 * single quote. For the other special characters we use the named entity 4293 * references, as PHP is doing. 4294 * 4295 * @author {@link http://wikkawiki.org/JavaWoman Marjolein Katsma} 4296 * 4297 * @license http://www.gnu.org/copyleft/lgpl.html 4298 * GNU Lesser General Public License 4299 * @copyright Copyright 2007, {@link http://wikkawiki.org/CreditsPage 4300 * Wikka Development Team} 4301 * 4302 * @access private 4303 * @param string $string string to be converted 4304 * @param integer $quote_style 4305 * - ENT_COMPAT: escapes &, <, > and double quote (default) 4306 * - ENT_NOQUOTES: escapes only &, < and > 4307 * - ENT_QUOTES: escapes &, <, >, double and single quotes 4308 * @return string converted string 4309 * @since 1.0.7.18 4310 */ 4311 function hsc($string, $quote_style = ENT_COMPAT) { 4312 // init 4313 static $aTransSpecchar = array( 4314 '&' => '&amp;', 4315 '"' => '&quot;', 4316 '<' => '&lt;', 4317 '>' => '&gt;', 4318 4319 //This fix is related to SF#1923020, but has to be applied 4320 //regardless of actually highlighting symbols. 4321 4322 //Circumvent a bug with symbol highlighting 4323 //This is required as ; would produce undesirable side-effects if it 4324 //was not to be processed as an entity. 4325 ';' => '<SEMI>', // Force ; to be processed as entity 4326 '|' => '<PIPE>' // Force | to be processed as entity 4327 ); // ENT_COMPAT set 4328 4329 switch ($quote_style) { 4330 case ENT_NOQUOTES: // don't convert double quotes 4331 unset($aTransSpecchar['"']); 4332 break; 4333 case ENT_QUOTES: // convert single quotes as well 4334 $aTransSpecchar["'"] = '&#39;'; // (apos) htmlspecialchars() uses '&#039;' 4335 break; 4336 } 4337 4338 // return translated string 4339 return strtr($string, $aTransSpecchar); 4340 } 4341 4342 function _genCSSName($name){ 4343 return (is_numeric($name[0]) ? '_' : '') . $name; 4344 } 4345 4346 /** 4347 * Returns a stylesheet for the highlighted code. If $economy mode 4348 * is true, we only return the stylesheet declarations that matter for 4349 * this code block instead of the whole thing 4350 * 4351 * @param boolean Whether to use economy mode or not 4352 * @return string A stylesheet built on the data for the current language 4353 * @since 1.0.0 4354 */ 4355 function get_stylesheet($economy_mode = true) { 4356 // If there's an error, chances are that the language file 4357 // won't have populated the language data file, so we can't 4358 // risk getting a stylesheet... 4359 if ($this->error) { 4360 return ''; 4361 } 4362 4363 //Check if the style rearrangements have been processed ... 4364 //This also does some preprocessing to check which style groups are useable ... 4365 if(!isset($this->language_data['NUMBERS_CACHE'])) { 4366 $this->build_style_cache(); 4367 } 4368 4369 // First, work out what the selector should be. If there's an ID, 4370 // that should be used, the same for a class. Otherwise, a selector 4371 // of '' means that these styles will be applied anywhere 4372 if ($this->overall_id) { 4373 $selector = '#' . $this->_genCSSName($this->overall_id); 4374 } else { 4375 $selector = '.' . $this->_genCSSName($this->language); 4376 if ($this->overall_class) { 4377 $selector .= '.' . $this->_genCSSName($this->overall_class); 4378 } 4379 } 4380 $selector .= ' '; 4381 4382 // Header of the stylesheet 4383 if (!$economy_mode) { 4384 $stylesheet = "/**\n". 4385 " * GeSHi Dynamically Generated Stylesheet\n". 4386 " * --------------------------------------\n". 4387 " * Dynamically generated stylesheet for {$this->language}\n". 4388 " * CSS class: {$this->overall_class}, CSS id: {$this->overall_id}\n". 4389 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" . 4390 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n". 4391 " * --------------------------------------\n". 4392 " */\n"; 4393 } else { 4394 $stylesheet = "/**\n". 4395 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" . 4396 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n". 4397 " */\n"; 4398 } 4399 4400 // Set the <ol> to have no effect at all if there are line numbers 4401 // (<ol>s have margins that should be destroyed so all layout is 4402 // controlled by the set_overall_style method, which works on the 4403 // <pre> or <div> container). Additionally, set default styles for lines 4404 if (!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) { 4405 //$stylesheet .= "$selector, {$selector}ol, {$selector}ol li {margin: 0;}\n"; 4406 $stylesheet .= "$selector.de1, $selector.de2 {{$this->code_style}}\n"; 4407 } 4408 4409 // Add overall styles 4410 // note: neglect economy_mode, empty styles are meaningless 4411 if ($this->overall_style != '') { 4412 $stylesheet .= "$selector {{$this->overall_style}}\n"; 4413 } 4414 4415 // Add styles for links 4416 // note: economy mode does not make _any_ sense here 4417 // either the style is empty and thus no selector is needed 4418 // or the appropriate key is given. 4419 foreach ($this->link_styles as $key => $style) { 4420 if ($style != '') { 4421 switch ($key) { 4422 case GESHI_LINK: 4423 $stylesheet .= "{$selector}a:link {{$style}}\n"; 4424 break; 4425 case GESHI_HOVER: 4426 $stylesheet .= "{$selector}a:hover {{$style}}\n"; 4427 break; 4428 case GESHI_ACTIVE: 4429 $stylesheet .= "{$selector}a:active {{$style}}\n"; 4430 break; 4431 case GESHI_VISITED: 4432 $stylesheet .= "{$selector}a:visited {{$style}}\n"; 4433 break; 4434 } 4435 } 4436 } 4437 4438 // Header and footer 4439 // note: neglect economy_mode, empty styles are meaningless 4440 if ($this->header_content_style != '') { 4441 $stylesheet .= "$selector.head {{$this->header_content_style}}\n"; 4442 } 4443 if ($this->footer_content_style != '') { 4444 $stylesheet .= "$selector.foot {{$this->footer_content_style}}\n"; 4445 } 4446 4447 // Styles for important stuff 4448 // note: neglect economy_mode, empty styles are meaningless 4449 if ($this->important_styles != '') { 4450 $stylesheet .= "$selector.imp {{$this->important_styles}}\n"; 4451 } 4452 4453 // Simple line number styles 4454 if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->line_style1 != '') { 4455 $stylesheet .= "{$selector}li, {$selector}.li1 {{$this->line_style1}}\n"; 4456 } 4457 if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->table_linenumber_style != '') { 4458 $stylesheet .= "{$selector}.ln {{$this->table_linenumber_style}}\n"; 4459 } 4460 // If there is a style set for fancy line numbers, echo it out 4461 if ((!$economy_mode || $this->line_numbers == GESHI_FANCY_LINE_NUMBERS) && $this->line_style2 != '') { 4462 $stylesheet .= "{$selector}.li2 {{$this->line_style2}}\n"; 4463 } 4464 4465 // note: empty styles are meaningless 4466 foreach ($this->language_data['STYLES']['KEYWORDS'] as $group => $styles) { 4467 if ($styles != '' && (!$economy_mode || 4468 (isset($this->lexic_permissions['KEYWORDS'][$group]) && 4469 $this->lexic_permissions['KEYWORDS'][$group]))) { 4470 $stylesheet .= "$selector.kw$group {{$styles}}\n"; 4471 } 4472 } 4473 foreach ($this->language_data['STYLES']['COMMENTS'] as $group => $styles) { 4474 if ($styles != '' && (!$economy_mode || 4475 (isset($this->lexic_permissions['COMMENTS'][$group]) && 4476 $this->lexic_permissions['COMMENTS'][$group]) || 4477 (!empty($this->language_data['COMMENT_REGEXP']) && 4478 !empty($this->language_data['COMMENT_REGEXP'][$group])))) { 4479 $stylesheet .= "$selector.co$group {{$styles}}\n"; 4480 } 4481 } 4482 foreach ($this->language_data['STYLES']['ESCAPE_CHAR'] as $group => $styles) { 4483 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['ESCAPE_CHAR'])) { 4484 // NEW: since 1.0.8 we have to handle hardescapes 4485 if ($group === 'HARD') { 4486 $group = '_h'; 4487 } 4488 $stylesheet .= "$selector.es$group {{$styles}}\n"; 4489 } 4490 } 4491 foreach ($this->language_data['STYLES']['BRACKETS'] as $group => $styles) { 4492 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['BRACKETS'])) { 4493 $stylesheet .= "$selector.br$group {{$styles}}\n"; 4494 } 4495 } 4496 foreach ($this->language_data['STYLES']['SYMBOLS'] as $group => $styles) { 4497 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['SYMBOLS'])) { 4498 $stylesheet .= "$selector.sy$group {{$styles}}\n"; 4499 } 4500 } 4501 foreach ($this->language_data['STYLES']['STRINGS'] as $group => $styles) { 4502 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['STRINGS'])) { 4503 // NEW: since 1.0.8 we have to handle hardquotes 4504 if ($group === 'HARD') { 4505 $group = '_h'; 4506 } 4507 $stylesheet .= "$selector.st$group {{$styles}}\n"; 4508 } 4509 } 4510 foreach ($this->language_data['STYLES']['NUMBERS'] as $group => $styles) { 4511 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['NUMBERS'])) { 4512 $stylesheet .= "$selector.nu$group {{$styles}}\n"; 4513 } 4514 } 4515 foreach ($this->language_data['STYLES']['METHODS'] as $group => $styles) { 4516 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['METHODS'])) { 4517 $stylesheet .= "$selector.me$group {{$styles}}\n"; 4518 } 4519 } 4520 // note: neglect economy_mode, empty styles are meaningless 4521 foreach ($this->language_data['STYLES']['SCRIPT'] as $group => $styles) { 4522 if ($styles != '') { 4523 $stylesheet .= "$selector.sc$group {{$styles}}\n"; 4524 } 4525 } 4526 foreach ($this->language_data['STYLES']['REGEXPS'] as $group => $styles) { 4527 if ($styles != '' && (!$economy_mode || 4528 (isset($this->lexic_permissions['REGEXPS'][$group]) && 4529 $this->lexic_permissions['REGEXPS'][$group]))) { 4530 if (is_array($this->language_data['REGEXPS'][$group]) && 4531 array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$group])) { 4532 $stylesheet .= "$selector."; 4533 $stylesheet .= $this->language_data['REGEXPS'][$group][GESHI_CLASS]; 4534 $stylesheet .= " {{$styles}}\n"; 4535 } else { 4536 $stylesheet .= "$selector.re$group {{$styles}}\n"; 4537 } 4538 } 4539 } 4540 // Styles for lines being highlighted extra 4541 if (!$economy_mode || (count($this->highlight_extra_lines)!=count($this->highlight_extra_lines_styles))) { 4542 $stylesheet .= "{$selector}.ln-xtra, {$selector}li.ln-xtra, {$selector}div.ln-xtra {{$this->highlight_extra_lines_style}}\n"; 4543 } 4544 $stylesheet .= "{$selector}span.xtra { display:block; }\n"; 4545 foreach ($this->highlight_extra_lines_styles as $lineid => $linestyle) { 4546 $stylesheet .= "{$selector}.lx$lineid, {$selector}li.lx$lineid, {$selector}div.lx$lineid {{$linestyle}}\n"; 4547 } 4548 4549 return $stylesheet; 4550 } 4551 4552 /** 4553 * Get's the style that is used for the specified line 4554 * 4555 * @param int The line number information is requested for 4556 * @access private 4557 * @since 1.0.7.21 4558 */ 4559 function get_line_style($line) { 4560 //$style = null; 4561 $style = null; 4562 if (isset($this->highlight_extra_lines_styles[$line])) { 4563 $style = $this->highlight_extra_lines_styles[$line]; 4564 } else { // if no "extra" style assigned 4565 $style = $this->highlight_extra_lines_style; 4566 } 4567 4568 return $style; 4569 } 4570 4571 /** 4572 * this functions creates an optimized regular expression list 4573 * of an array of strings. 4574 * 4575 * Example: 4576 * <code>$list = array('faa', 'foo', 'foobar'); 4577 * => string 'f(aa|oo(bar)?)'</code> 4578 * 4579 * @param $list array of (unquoted) strings 4580 * @param $regexp_delimiter your regular expression delimiter, @see preg_quote() 4581 * @return string for regular expression 4582 * @author Milian Wolff <mail@milianw.de> 4583 * @since 1.0.8 4584 * @access private 4585 */ 4586 function optimize_regexp_list($list, $regexp_delimiter = '/') { 4587 $regex_chars = array('.', '\\', '+', '-', '*', '?', '[', '^', ']', '$', 4588 '(', ')', '{', '}', '=', '!', '<', '>', '|', ':', $regexp_delimiter); 4589 sort($list); 4590 $regexp_list = array(''); 4591 $num_subpatterns = 0; 4592 $list_key = 0; 4593 4594 // the tokens which we will use to generate the regexp list 4595 $tokens = array(); 4596 $prev_keys = array(); 4597 // go through all entries of the list and generate the token list 4598 $cur_len = 0; 4599 for ($i = 0, $i_max = count($list); $i < $i_max; ++$i) { 4600 if ($cur_len > GESHI_MAX_PCRE_LENGTH) { 4601 // seems like the length of this pcre is growing exorbitantly 4602 $regexp_list[++$list_key] = $this->_optimize_regexp_list_tokens_to_string($tokens); 4603 $num_subpatterns = substr_count($regexp_list[$list_key], '(?:'); 4604 $tokens = array(); 4605 $cur_len = 0; 4606 } 4607 $level = 0; 4608 $entry = preg_quote((string) $list[$i], $regexp_delimiter); 4609 $pointer = &$tokens; 4610 // properly assign the new entry to the correct position in the token array 4611 // possibly generate smaller common denominator keys 4612 while (true) { 4613 // get the common denominator 4614 if (isset($prev_keys[$level])) { 4615 if ($prev_keys[$level] == $entry) { 4616 // this is a duplicate entry, skip it 4617 continue 2; 4618 } 4619 $char = 0; 4620 while (isset($entry[$char]) && isset($prev_keys[$level][$char]) 4621 && $entry[$char] == $prev_keys[$level][$char]) { 4622 ++$char; 4623 } 4624 if ($char > 0) { 4625 // this entry has at least some chars in common with the current key 4626 if ($char == strlen($prev_keys[$level])) { 4627 // current key is totally matched, i.e. this entry has just some bits appended 4628 $pointer = &$pointer[$prev_keys[$level]]; 4629 } else { 4630 // only part of the keys match 4631 $new_key_part1 = substr($prev_keys[$level], 0, $char); 4632 $new_key_part2 = substr($prev_keys[$level], $char); 4633 4634 if (in_array($new_key_part1[0], $regex_chars) 4635 || in_array($new_key_part2[0], $regex_chars)) { 4636 // this is bad, a regex char as first character 4637 $pointer[$entry] = array('' => true); 4638 array_splice($prev_keys, $level, count($prev_keys), $entry); 4639 $cur_len += strlen($entry); 4640 continue; 4641 } else { 4642 // relocate previous tokens 4643 $pointer[$new_key_part1] = array($new_key_part2 => $pointer[$prev_keys[$level]]); 4644 unset($pointer[$prev_keys[$level]]); 4645 $pointer = &$pointer[$new_key_part1]; 4646 // recreate key index 4647 array_splice($prev_keys, $level, count($prev_keys), array($new_key_part1, $new_key_part2)); 4648 $cur_len += strlen($new_key_part2); 4649 } 4650 } 4651 ++$level; 4652 $entry = substr($entry, $char); 4653 continue; 4654 } 4655 // else: fall trough, i.e. no common denominator was found 4656 } 4657 if ($level == 0 && !empty($tokens)) { 4658 // we can dump current tokens into the string and throw them away afterwards 4659 $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens); 4660 $new_subpatterns = substr_count($new_entry, '(?:'); 4661 if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + $new_subpatterns > GESHI_MAX_PCRE_SUBPATTERNS) { 4662 $regexp_list[++$list_key] = $new_entry; 4663 $num_subpatterns = $new_subpatterns; 4664 } else { 4665 if (!empty($regexp_list[$list_key])) { 4666 $new_entry = '|' . $new_entry; 4667 } 4668 $regexp_list[$list_key] .= $new_entry; 4669 $num_subpatterns += $new_subpatterns; 4670 } 4671 $tokens = array(); 4672 $cur_len = 0; 4673 } 4674 // no further common denominator found 4675 $pointer[$entry] = array('' => true); 4676 array_splice($prev_keys, $level, count($prev_keys), $entry); 4677 4678 $cur_len += strlen($entry); 4679 break; 4680 } 4681 unset($list[$i]); 4682 } 4683 // make sure the last tokens get converted as well 4684 $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens); 4685 if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + substr_count($new_entry, '(?:') > GESHI_MAX_PCRE_SUBPATTERNS) { 4686 if ( !empty($regexp_list[$list_key]) ) { 4687 ++$list_key; 4688 } 4689 $regexp_list[$list_key] = $new_entry; 4690 } else { 4691 if (!empty($regexp_list[$list_key])) { 4692 $new_entry = '|' . $new_entry; 4693 } 4694 $regexp_list[$list_key] .= $new_entry; 4695 } 4696 return $regexp_list; 4697 } 4698 /** 4699 * this function creates the appropriate regexp string of an token array 4700 * you should not call this function directly, @see $this->optimize_regexp_list(). 4701 * 4702 * @param &$tokens array of tokens 4703 * @param $recursed bool to know wether we recursed or not 4704 * @return string 4705 * @author Milian Wolff <mail@milianw.de> 4706 * @since 1.0.8 4707 * @access private 4708 */ 4709 function _optimize_regexp_list_tokens_to_string(&$tokens, $recursed = false) { 4710 $list = ''; 4711 foreach ($tokens as $token => $sub_tokens) { 4712 $list .= $token; 4713 $close_entry = isset($sub_tokens['']); 4714 unset($sub_tokens['']); 4715 if (!empty($sub_tokens)) { 4716 $list .= '(?:' . $this->_optimize_regexp_list_tokens_to_string($sub_tokens, true) . ')'; 4717 if ($close_entry) { 4718 // make sub_tokens optional 4719 $list .= '?'; 4720 } 4721 } 4722 $list .= '|'; 4723 } 4724 if (!$recursed) { 4725 // do some optimizations 4726 // common trailing strings 4727 // BUGGY! 4728 //$list = preg_replace_callback('#(?<=^|\:|\|)\w+?(\w+)(?:\|.+\1)+(?=\|)#', create_function( 4729 // '$matches', 'return "(?:" . preg_replace("#" . preg_quote($matches[1], "#") . "(?=\||$)#", "", $matches[0]) . ")" . $matches[1];'), $list); 4730 // (?:p)? => p? 4731 $list = preg_replace('#\(\?\:(.)\)\?#', '\1?', $list); 4732 // (?:a|b|c|d|...)? => [abcd...]? 4733 // TODO: a|bb|c => [ac]|bb 4734 static $callback_2; 4735 if (!isset($callback_2)) { 4736 $callback_2 = function( $matches ) { 4737 return "[" . str_replace("|", "", $matches[1]) . "]"; 4738 }; 4739 } 4740 $list = preg_replace_callback('#\(\?\:((?:.\|)+.)\)#', $callback_2, $list); 4741 } 4742 // return $list without trailing pipe 4743 return substr($list, 0, -1); 4744 } 4745 } // End Class GeSHi 4746 4747 4748 if (!function_exists('geshi_highlight')) { 4749 /** 4750 * Easy way to highlight stuff. Behaves just like highlight_string 4751 * 4752 * @param string The code to highlight 4753 * @param string The language to highlight the code in 4754 * @param string The path to the language files. You can leave this blank if you need 4755 * as from version 1.0.7 the path should be automatically detected 4756 * @param boolean Whether to return the result or to echo 4757 * @return string The code highlighted (if $return is true) 4758 * @since 1.0.2 4759 */ 4760 function geshi_highlight($string, $language, $path = null, $return = false) { 4761 $geshi = new GeSHi($string, $language, $path); 4762 $geshi->set_header_type(GESHI_HEADER_NONE); 4763 4764 if ($return) { 4765 return '<code>' . $geshi->parse_code() . '</code>'; 4766 } 4767 4768 echo '<code>' . $geshi->parse_code() . '</code>'; 4769 4770 if ($geshi->error()) { 4771 return false; 4772 } 4773 return true; 4774 } 4775 } 4776 4777 ?>
Download geshi/geshi.php
History Sun, 9 Dec 2018 23:32:58 +0100 Jan Dankert Fix: Geshi PHP7-fähig Thu, 20 Oct 2016 00:06:06 +0200 Jan Dankert Aktuelle Geshi-Version installiert. Fri, 23 Nov 2007 00:12:00 +0100 dankert Farbige Darstellung von Code-Bl?cken mit Hilfe der Bibliothek GESHI.