openrat-cms

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README

geshi.php (204612B)


      1 <?php
      2 /**
      3  * GeSHi - Generic Syntax Highlighter
      4  *
      5  * The GeSHi class for Generic Syntax Highlighting. Please refer to the
      6  * documentation at http://qbnz.com/highlighter/documentation.php for more
      7  * information about how to use this class.
      8  *
      9  * For changes, release notes, TODOs etc, see the relevant files in the docs/
     10  * directory.
     11  *
     12  *   This file is part of GeSHi.
     13  *
     14  *  GeSHi is free software; you can redistribute it and/or modify
     15  *  it under the terms of the GNU General Public License as published by
     16  *  the Free Software Foundation; either version 2 of the License, or
     17  *  (at your option) any later version.
     18  *
     19  *  GeSHi is distributed in the hope that it will be useful,
     20  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     21  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     22  *  GNU General Public License for more details.
     23  *
     24  *  You should have received a copy of the GNU General Public License
     25  *  along with GeSHi; if not, write to the Free Software
     26  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
     27  *
     28  * @package    geshi
     29  * @subpackage core
     30  * @author     Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de>
     31  * @copyright  (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
     32  * @license    http://gnu.org/copyleft/gpl.html GNU GPL
     33  *
     34  */
     35 
     36 //
     37 // GeSHi Constants
     38 // You should use these constant names in your programs instead of
     39 // their values - you never know when a value may change in a future
     40 // version
     41 //
     42 
     43 /** The version of this GeSHi file */
     44 define('GESHI_VERSION', '1.0.8.11');
     45 
     46 // Define the root directory for the GeSHi code tree
     47 if (!defined('GESHI_ROOT')) {
     48     /** The root directory for GeSHi */
     49     define('GESHI_ROOT', dirname(__FILE__) . DIRECTORY_SEPARATOR);
     50 }
     51 /** The language file directory for GeSHi
     52     @access private */
     53 define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . DIRECTORY_SEPARATOR);
     54 
     55 // Define if GeSHi should be paranoid about security
     56 if (!defined('GESHI_SECURITY_PARANOID')) {
     57     /** Tells GeSHi to be paranoid about security settings */
     58     define('GESHI_SECURITY_PARANOID', false);
     59 }
     60 
     61 // Line numbers - use with enable_line_numbers()
     62 /** Use no line numbers when building the result */
     63 define('GESHI_NO_LINE_NUMBERS', 0);
     64 /** Use normal line numbers when building the result */
     65 define('GESHI_NORMAL_LINE_NUMBERS', 1);
     66 /** Use fancy line numbers when building the result */
     67 define('GESHI_FANCY_LINE_NUMBERS', 2);
     68 
     69 // Container HTML type
     70 /** Use nothing to surround the source */
     71 define('GESHI_HEADER_NONE', 0);
     72 /** Use a "div" to surround the source */
     73 define('GESHI_HEADER_DIV', 1);
     74 /** Use a "pre" to surround the source */
     75 define('GESHI_HEADER_PRE', 2);
     76 /** Use a pre to wrap lines when line numbers are enabled or to wrap the whole code. */
     77 define('GESHI_HEADER_PRE_VALID', 3);
     78 /**
     79  * Use a "table" to surround the source:
     80  *
     81  *  <table>
     82  *    <thead><tr><td colspan="2">$header</td></tr></thead>
     83  *    <tbody><tr><td><pre>$linenumbers</pre></td><td><pre>$code></pre></td></tr></tbody>
     84  *    <tfooter><tr><td colspan="2">$footer</td></tr></tfoot>
     85  *  </table>
     86  *
     87  * this is essentially only a workaround for Firefox, see sf#1651996 or take a look at
     88  * https://bugzilla.mozilla.org/show_bug.cgi?id=365805
     89  * @note when linenumbers are disabled this is essentially the same as GESHI_HEADER_PRE
     90  */
     91 define('GESHI_HEADER_PRE_TABLE', 4);
     92 
     93 // Capatalisation constants
     94 /** Lowercase keywords found */
     95 define('GESHI_CAPS_NO_CHANGE', 0);
     96 /** Uppercase keywords found */
     97 define('GESHI_CAPS_UPPER', 1);
     98 /** Leave keywords found as the case that they are */
     99 define('GESHI_CAPS_LOWER', 2);
    100 
    101 // Link style constants
    102 /** Links in the source in the :link state */
    103 define('GESHI_LINK', 0);
    104 /** Links in the source in the :hover state */
    105 define('GESHI_HOVER', 1);
    106 /** Links in the source in the :active state */
    107 define('GESHI_ACTIVE', 2);
    108 /** Links in the source in the :visited state */
    109 define('GESHI_VISITED', 3);
    110 
    111 // Important string starter/finisher
    112 // Note that if you change these, they should be as-is: i.e., don't
    113 // write them as if they had been run through htmlentities()
    114 /** The starter for important parts of the source */
    115 define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>');
    116 /** The ender for important parts of the source */
    117 define('GESHI_END_IMPORTANT', '<END GeSHi>');
    118 
    119 /**#@+
    120  *  @access private
    121  */
    122 // When strict mode applies for a language
    123 /** Strict mode never applies (this is the most common) */
    124 define('GESHI_NEVER', 0);
    125 /** Strict mode *might* apply, and can be enabled or
    126     disabled by {@link GeSHi->enable_strict_mode()} */
    127 define('GESHI_MAYBE', 1);
    128 /** Strict mode always applies */
    129 define('GESHI_ALWAYS', 2);
    130 
    131 // Advanced regexp handling constants, used in language files
    132 /** The key of the regex array defining what to search for */
    133 define('GESHI_SEARCH', 0);
    134 /** The key of the regex array defining what bracket group in a
    135     matched search to use as a replacement */
    136 define('GESHI_REPLACE', 1);
    137 /** The key of the regex array defining any modifiers to the regular expression */
    138 define('GESHI_MODIFIERS', 2);
    139 /** The key of the regex array defining what bracket group in a
    140     matched search to put before the replacement */
    141 define('GESHI_BEFORE', 3);
    142 /** The key of the regex array defining what bracket group in a
    143     matched search to put after the replacement */
    144 define('GESHI_AFTER', 4);
    145 /** The key of the regex array defining a custom keyword to use
    146     for this regexp's html tag class */
    147 define('GESHI_CLASS', 5);
    148 
    149 /** Used in language files to mark comments */
    150 define('GESHI_COMMENTS', 0);
    151 
    152 /** Used to work around missing PHP features **/
    153 define('GESHI_PHP_PRE_433', !(version_compare(PHP_VERSION, '4.3.3') === 1));
    154 
    155 /** make sure we can call stripos **/
    156 if (!function_exists('stripos')) {
    157     // the offset param of preg_match is not supported below PHP 4.3.3
    158     if (GESHI_PHP_PRE_433) {
    159         /**
    160          * @ignore
    161          */
    162         function stripos($haystack, $needle, $offset = null) {
    163             if (!is_null($offset)) {
    164                 $haystack = substr($haystack, $offset);
    165             }
    166             if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE)) {
    167                 return $match[0][1];
    168             }
    169             return false;
    170         }
    171     }
    172     else {
    173         /**
    174          * @ignore
    175          */
    176         function stripos($haystack, $needle, $offset = null) {
    177             if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE, $offset)) {
    178                 return $match[0][1];
    179             }
    180             return false;
    181         }
    182     }
    183 }
    184 
    185 /** some old PHP / PCRE subpatterns only support up to xxx subpatterns in
    186     regular expressions. Set this to false if your PCRE lib is up to date
    187     @see GeSHi->optimize_regexp_list()
    188     **/
    189 define('GESHI_MAX_PCRE_SUBPATTERNS', 500);
    190 /** it's also important not to generate too long regular expressions
    191     be generous here... but keep in mind, that when reaching this limit we
    192     still have to close open patterns. 12k should do just fine on a 16k limit.
    193     @see GeSHi->optimize_regexp_list()
    194     **/
    195 define('GESHI_MAX_PCRE_LENGTH', 12288);
    196 
    197 //Number format specification
    198 /** Basic number format for integers */
    199 define('GESHI_NUMBER_INT_BASIC', 1);        //Default integers \d+
    200 /** Enhanced number format for integers like seen in C */
    201 define('GESHI_NUMBER_INT_CSTYLE', 2);       //Default C-Style \d+[lL]?
    202 /** Number format to highlight binary numbers with a suffix "b" */
    203 define('GESHI_NUMBER_BIN_SUFFIX', 16);           //[01]+[bB]
    204 /** Number format to highlight binary numbers with a prefix % */
    205 define('GESHI_NUMBER_BIN_PREFIX_PERCENT', 32);   //%[01]+
    206 /** Number format to highlight binary numbers with a prefix 0b (C) */
    207 define('GESHI_NUMBER_BIN_PREFIX_0B', 64);        //0b[01]+
    208 /** Number format to highlight octal numbers with a leading zero */
    209 define('GESHI_NUMBER_OCT_PREFIX', 256);           //0[0-7]+
    210 /** Number format to highlight octal numbers with a prefix 0o (logtalk) */
    211 define('GESHI_NUMBER_OCT_PREFIX_0O', 512);           //0[0-7]+
    212 /** Number format to highlight octal numbers with a leading @ (Used in HiSofts Devpac series). */
    213 define('GESHI_NUMBER_OCT_PREFIX_AT', 1024);           //@[0-7]+
    214 /** Number format to highlight octal numbers with a suffix of o */
    215 define('GESHI_NUMBER_OCT_SUFFIX', 2048);           //[0-7]+[oO]
    216 /** Number format to highlight hex numbers with a prefix 0x */
    217 define('GESHI_NUMBER_HEX_PREFIX', 4096);           //0x[0-9a-fA-F]+
    218 /** Number format to highlight hex numbers with a prefix $ */
    219 define('GESHI_NUMBER_HEX_PREFIX_DOLLAR', 8192);           //$[0-9a-fA-F]+
    220 /** Number format to highlight hex numbers with a suffix of h */
    221 define('GESHI_NUMBER_HEX_SUFFIX', 16384);           //[0-9][0-9a-fA-F]*h
    222 /** Number format to highlight floating-point numbers without support for scientific notation */
    223 define('GESHI_NUMBER_FLT_NONSCI', 65536);          //\d+\.\d+
    224 /** Number format to highlight floating-point numbers without support for scientific notation */
    225 define('GESHI_NUMBER_FLT_NONSCI_F', 131072);       //\d+(\.\d+)?f
    226 /** Number format to highlight floating-point numbers with support for scientific notation (E) and optional leading zero */
    227 define('GESHI_NUMBER_FLT_SCI_SHORT', 262144);      //\.\d+e\d+
    228 /** Number format to highlight floating-point numbers with support for scientific notation (E) and required leading digit */
    229 define('GESHI_NUMBER_FLT_SCI_ZERO', 524288);       //\d+(\.\d+)?e\d+
    230 //Custom formats are passed by RX array
    231 
    232 // Error detection - use these to analyse faults
    233 /** No sourcecode to highlight was specified
    234  * @deprecated
    235  */
    236 define('GESHI_ERROR_NO_INPUT', 1);
    237 /** The language specified does not exist */
    238 define('GESHI_ERROR_NO_SUCH_LANG', 2);
    239 /** GeSHi could not open a file for reading (generally a language file) */
    240 define('GESHI_ERROR_FILE_NOT_READABLE', 3);
    241 /** The header type passed to {@link GeSHi->set_header_type()} was invalid */
    242 define('GESHI_ERROR_INVALID_HEADER_TYPE', 4);
    243 /** The line number type passed to {@link GeSHi->enable_line_numbers()} was invalid */
    244 define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5);
    245 /**#@-*/
    246 
    247 
    248 /**
    249  * The GeSHi Class.
    250  *
    251  * Please refer to the documentation for GeSHi 1.0.X that is available
    252  * at http://qbnz.com/highlighter/documentation.php for more information
    253  * about how to use this class.
    254  *
    255  * @package   geshi
    256  * @author    Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de>
    257  * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
    258  */
    259 class GeSHi {
    260     /**#@+
    261      * @access private
    262      */
    263     /**
    264      * The source code to highlight
    265      * @var string
    266      */
    267     var $source = '';
    268 
    269     /**
    270      * The language to use when highlighting
    271      * @var string
    272      */
    273     var $language = '';
    274 
    275     /**
    276      * The data for the language used
    277      * @var array
    278      */
    279     var $language_data = array();
    280 
    281     /**
    282      * The path to the language files
    283      * @var string
    284      */
    285     var $language_path = GESHI_LANG_ROOT;
    286 
    287     /**
    288      * The error message associated with an error
    289      * @var string
    290      * @todo check err reporting works
    291      */
    292     var $error = false;
    293 
    294     /**
    295      * Possible error messages
    296      * @var array
    297      */
    298     var $error_messages = array(
    299         GESHI_ERROR_NO_SUCH_LANG => 'GeSHi could not find the language {LANGUAGE} (using path {PATH})',
    300         GESHI_ERROR_FILE_NOT_READABLE => 'The file specified for load_from_file was not readable',
    301         GESHI_ERROR_INVALID_HEADER_TYPE => 'The header type specified is invalid',
    302         GESHI_ERROR_INVALID_LINE_NUMBER_TYPE => 'The line number type specified is invalid'
    303     );
    304 
    305     /**
    306      * Whether highlighting is strict or not
    307      * @var boolean
    308      */
    309     var $strict_mode = false;
    310 
    311     /**
    312      * Whether to use CSS classes in output
    313      * @var boolean
    314      */
    315     var $use_classes = false;
    316 
    317     /**
    318      * The type of header to use. Can be one of the following
    319      * values:
    320      *
    321      * - GESHI_HEADER_PRE: Source is outputted in a "pre" HTML element.
    322      * - GESHI_HEADER_DIV: Source is outputted in a "div" HTML element.
    323      * - GESHI_HEADER_NONE: No header is outputted.
    324      *
    325      * @var int
    326      */
    327     var $header_type = GESHI_HEADER_PRE;
    328 
    329     /**
    330      * Array of permissions for which lexics should be highlighted
    331      * @var array
    332      */
    333     var $lexic_permissions = array(
    334         'KEYWORDS' =>    array(),
    335         'COMMENTS' =>    array('MULTI' => true),
    336         'REGEXPS' =>     array(),
    337         'ESCAPE_CHAR' => true,
    338         'BRACKETS' =>    true,
    339         'SYMBOLS' =>     false,
    340         'STRINGS' =>     true,
    341         'NUMBERS' =>     true,
    342         'METHODS' =>     true,
    343         'SCRIPT' =>      true
    344     );
    345 
    346     /**
    347      * The time it took to parse the code
    348      * @var double
    349      */
    350     var $time = 0;
    351 
    352     /**
    353      * The content of the header block
    354      * @var string
    355      */
    356     var $header_content = '';
    357 
    358     /**
    359      * The content of the footer block
    360      * @var string
    361      */
    362     var $footer_content = '';
    363 
    364     /**
    365      * The style of the header block
    366      * @var string
    367      */
    368     var $header_content_style = '';
    369 
    370     /**
    371      * The style of the footer block
    372      * @var string
    373      */
    374     var $footer_content_style = '';
    375 
    376     /**
    377      * Tells if a block around the highlighted source should be forced
    378      * if not using line numbering
    379      * @var boolean
    380      */
    381     var $force_code_block = false;
    382 
    383     /**
    384      * The styles for hyperlinks in the code
    385      * @var array
    386      */
    387     var $link_styles = array();
    388 
    389     /**
    390      * Whether important blocks should be recognised or not
    391      * @var boolean
    392      * @deprecated
    393      * @todo REMOVE THIS FUNCTIONALITY!
    394      */
    395     var $enable_important_blocks = false;
    396 
    397     /**
    398      * Styles for important parts of the code
    399      * @var string
    400      * @deprecated
    401      * @todo As above - rethink the whole idea of important blocks as it is buggy and
    402      * will be hard to implement in 1.2
    403      */
    404     var $important_styles = 'font-weight: bold; color: red;'; // Styles for important parts of the code
    405 
    406     /**
    407      * Whether CSS IDs should be added to the code
    408      * @var boolean
    409      */
    410     var $add_ids = false;
    411 
    412     /**
    413      * Lines that should be highlighted extra
    414      * @var array
    415      */
    416     var $highlight_extra_lines = array();
    417 
    418     /**
    419      * Styles of lines that should be highlighted extra
    420      * @var array
    421      */
    422     var $highlight_extra_lines_styles = array();
    423 
    424     /**
    425      * Styles of extra-highlighted lines
    426      * @var string
    427      */
    428     var $highlight_extra_lines_style = 'background-color: #ffc;';
    429 
    430     /**
    431      * The line ending
    432      * If null, nl2br() will be used on the result string.
    433      * Otherwise, all instances of \n will be replaced with $line_ending
    434      * @var string
    435      */
    436     var $line_ending = null;
    437 
    438     /**
    439      * Number at which line numbers should start at
    440      * @var int
    441      */
    442     var $line_numbers_start = 1;
    443 
    444     /**
    445      * The overall style for this code block
    446      * @var string
    447      */
    448     var $overall_style = 'font-family:monospace;';
    449 
    450     /**
    451      *  The style for the actual code
    452      * @var string
    453      */
    454     var $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;';
    455 
    456     /**
    457      * The overall class for this code block
    458      * @var string
    459      */
    460     var $overall_class = '';
    461 
    462     /**
    463      * The overall ID for this code block
    464      * @var string
    465      */
    466     var $overall_id = '';
    467 
    468     /**
    469      * Line number styles
    470      * @var string
    471      */
    472     var $line_style1 = 'font-weight: normal; vertical-align:top;';
    473 
    474     /**
    475      * Line number styles for fancy lines
    476      * @var string
    477      */
    478     var $line_style2 = 'font-weight: bold; vertical-align:top;';
    479 
    480     /**
    481      * Style for line numbers when GESHI_HEADER_PRE_TABLE is chosen
    482      * @var string
    483      */
    484     var $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;';
    485 
    486     /**
    487      * Flag for how line numbers are displayed
    488      * @var boolean
    489      */
    490     var $line_numbers = GESHI_NO_LINE_NUMBERS;
    491 
    492     /**
    493      * Flag to decide if multi line spans are allowed. Set it to false to make sure
    494      * each tag is closed before and reopened after each linefeed.
    495      * @var boolean
    496      */
    497     var $allow_multiline_span = true;
    498 
    499     /**
    500      * The "nth" value for fancy line highlighting
    501      * @var int
    502      */
    503     var $line_nth_row = 0;
    504 
    505     /**
    506      * The size of tab stops
    507      * @var int
    508      */
    509     var $tab_width = 8;
    510 
    511     /**
    512      * Should we use language-defined tab stop widths?
    513      * @var int
    514      */
    515     var $use_language_tab_width = false;
    516 
    517     /**
    518      * Default target for keyword links
    519      * @var string
    520      */
    521     var $link_target = '';
    522 
    523     /**
    524      * The encoding to use for entity encoding
    525      * NOTE: Used with Escape Char Sequences to fix UTF-8 handling (cf. SF#2037598)
    526      * @var string
    527      */
    528     var $encoding = 'utf-8';
    529 
    530     /**
    531      * Should keywords be linked?
    532      * @var boolean
    533      */
    534     var $keyword_links = true;
    535 
    536     /**
    537      * Currently loaded language file
    538      * @var string
    539      * @since 1.0.7.22
    540      */
    541     var $loaded_language = '';
    542 
    543     /**
    544      * Wether the caches needed for parsing are built or not
    545      *
    546      * @var bool
    547      * @since 1.0.8
    548      */
    549     var $parse_cache_built = false;
    550 
    551     /**
    552      * Work around for Suhosin Patch with disabled /e modifier
    553      *
    554      * Note from suhosins author in config file:
    555      * <blockquote>
    556      *   The /e modifier inside <code>preg_replace()</code> allows code execution.
    557      *   Often it is the cause for remote code execution exploits. It is wise to
    558      *   deactivate this feature and test where in the application it is used.
    559      *   The developer using the /e modifier should be made aware that he should
    560      *   use <code>preg_replace_callback()</code> instead
    561      * </blockquote>
    562      *
    563      * @var array
    564      * @since 1.0.8
    565      */
    566     var $_kw_replace_group = 0;
    567     var $_rx_key = 0;
    568 
    569     /**
    570      * some "callback parameters" for handle_multiline_regexps
    571      *
    572      * @since 1.0.8
    573      * @access private
    574      * @var string
    575      */
    576     var $_hmr_before = '';
    577     var $_hmr_replace = '';
    578     var $_hmr_after = '';
    579     var $_hmr_key = 0;
    580 
    581     /**#@-*/
    582 
    583     /**
    584      * Creates a new GeSHi object, with source and language
    585      *
    586      * @param string The source code to highlight
    587      * @param string The language to highlight the source with
    588      * @param string The path to the language file directory. <b>This
    589      *               is deprecated!</b> I've backported the auto path
    590      *               detection from the 1.1.X dev branch, so now it
    591      *               should be automatically set correctly. If you have
    592      *               renamed the language directory however, you will
    593      *               still need to set the path using this parameter or
    594      *               {@link GeSHi->set_language_path()}
    595      * @since 1.0.0
    596      */
    597     function __construct($source = '', $language = '', $path = '') {
    598         if (!empty($source)) {
    599             $this->set_source($source);
    600         }
    601         if (!empty($language)) {
    602             $this->set_language($language);
    603         }
    604         $this->set_language_path($path);
    605     }
    606 
    607     /**
    608      * Returns the version of GeSHi
    609      *
    610      * @return string
    611      * @since 1 0.8.11
    612      */
    613     function get_version()
    614     {
    615         return GESHI_VERSION;
    616     }
    617 
    618     /**
    619      * Returns an error message associated with the last GeSHi operation,
    620      * or false if no error has occured
    621      *
    622      * @return string|false An error message if there has been an error, else false
    623      * @since  1.0.0
    624      */
    625     function error() {
    626         if ($this->error) {
    627             //Put some template variables for debugging here ...
    628             $debug_tpl_vars = array(
    629                 '{LANGUAGE}' => $this->language,
    630                 '{PATH}' => $this->language_path
    631             );
    632             $msg = str_replace(
    633                 array_keys($debug_tpl_vars),
    634                 array_values($debug_tpl_vars),
    635                 $this->error_messages[$this->error]);
    636 
    637             return "<br /><strong>GeSHi Error:</strong> $msg (code {$this->error})<br />";
    638         }
    639         return false;
    640     }
    641 
    642     /**
    643      * Gets a human-readable language name (thanks to Simon Patterson
    644      * for the idea :))
    645      *
    646      * @return string The name for the current language
    647      * @since  1.0.2
    648      */
    649     function get_language_name() {
    650         if (GESHI_ERROR_NO_SUCH_LANG == $this->error) {
    651             return $this->language_data['LANG_NAME'] . ' (Unknown Language)';
    652         }
    653         return $this->language_data['LANG_NAME'];
    654     }
    655 
    656     /**
    657      * Sets the source code for this object
    658      *
    659      * @param string The source code to highlight
    660      * @since 1.0.0
    661      */
    662     function set_source($source) {
    663         $this->source = $source;
    664         $this->highlight_extra_lines = array();
    665     }
    666 
    667     /**
    668      * Sets the language for this object
    669      *
    670      * @note since 1.0.8 this function won't reset language-settings by default anymore!
    671      *       if you need this set $force_reset = true
    672      *
    673      * @param string The name of the language to use
    674      * @since 1.0.0
    675      */
    676     function set_language($language, $force_reset = false) {
    677         if ($force_reset) {
    678             $this->loaded_language = false;
    679         }
    680 
    681         //Clean up the language name to prevent malicious code injection
    682         $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
    683 
    684         $language = strtolower($language);
    685 
    686         //Retreive the full filename
    687         $file_name = $this->language_path . $language . '.php';
    688         if ($file_name == $this->loaded_language) {
    689             // this language is already loaded!
    690             return;
    691         }
    692 
    693         $this->language = $language;
    694 
    695         $this->error = false;
    696         $this->strict_mode = GESHI_NEVER;
    697 
    698         //Check if we can read the desired file
    699         if (!is_readable($file_name)) {
    700             $this->error = GESHI_ERROR_NO_SUCH_LANG;
    701             return;
    702         }
    703 
    704         // Load the language for parsing
    705         $this->load_language($file_name);
    706     }
    707 
    708     /**
    709      * Sets the path to the directory containing the language files. Note
    710      * that this path is relative to the directory of the script that included
    711      * geshi.php, NOT geshi.php itself.
    712      *
    713      * @param string The path to the language directory
    714      * @since 1.0.0
    715      * @deprecated The path to the language files should now be automatically
    716      *             detected, so this method should no longer be needed. The
    717      *             1.1.X branch handles manual setting of the path differently
    718      *             so this method will disappear in 1.2.0.
    719      */
    720     function set_language_path($path) {
    721         if(strpos($path,':')) {
    722             //Security Fix to prevent external directories using fopen wrappers.
    723             if(DIRECTORY_SEPARATOR == "\\") {
    724                 if(!preg_match('#^[a-zA-Z]:#', $path) || false !== strpos($path, ':', 2)) {
    725                     return;
    726                 }
    727             } else {
    728                 return;
    729             }
    730         }
    731         if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) {
    732             //Security Fix to prevent external directories using fopen wrappers.
    733             return;
    734         }
    735         if(GESHI_SECURITY_PARANOID && false !== strpos($path, '/.')) {
    736             //Security Fix to prevent external directories using fopen wrappers.
    737             return;
    738         }
    739         if(GESHI_SECURITY_PARANOID && false !== strpos($path, '..')) {
    740             //Security Fix to prevent external directories using fopen wrappers.
    741             return;
    742         }
    743         if ($path) {
    744             $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/';
    745             $this->set_language($this->language); // otherwise set_language_path has no effect
    746         }
    747     }
    748 
    749     /**
    750      * Get supported langs or an associative array lang=>full_name.
    751      * @param boolean $longnames
    752      * @return array
    753      */
    754     function get_supported_languages($full_names=false)
    755     {
    756         // return array
    757         $back = array();
    758 
    759         // we walk the lang root
    760         $dir = dir($this->language_path);
    761 
    762         // foreach entry
    763         while (false !== ($entry = $dir->read()))
    764         {
    765             $full_path = $this->language_path.$entry;
    766 
    767             // Skip all dirs
    768             if (is_dir($full_path)) {
    769                 continue;
    770             }
    771 
    772             // we only want lang.php files
    773             if (!preg_match('/^([^.]+)\.php$/', $entry, $matches)) {
    774                 continue;
    775             }
    776 
    777             // Raw lang name is here
    778             $langname = $matches[1];
    779 
    780             // We want the fullname too?
    781             if ($full_names === true)
    782             {
    783                 if (false !== ($fullname = $this->get_language_fullname($langname)))
    784                 {
    785                     $back[$langname] = $fullname; // we go associative
    786                 }
    787             }
    788             else
    789             {
    790                 // just store raw langname
    791                 $back[] = $langname;
    792             }
    793         }
    794 
    795         $dir->close();
    796 
    797         return $back;
    798     }
    799 
    800     /**
    801      * Get full_name for a lang or false.
    802      * @param string $language short langname (html4strict for example)
    803      * @return mixed
    804      */
    805     function get_language_fullname($language)
    806     {
    807         //Clean up the language name to prevent malicious code injection
    808         $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
    809 
    810         $language = strtolower($language);
    811 
    812         // get fullpath-filename for a langname
    813         $fullpath = $this->language_path.$language.'.php';
    814 
    815         // we need to get contents :S
    816         if (false === ($data = file_get_contents($fullpath))) {
    817             $this->error = sprintf('Geshi::get_lang_fullname() Unknown Language: %s', $language);
    818             return false;
    819         }
    820 
    821         // match the langname
    822         if (!preg_match('/\'LANG_NAME\'\s*=>\s*\'((?:[^\']|\\\')+?)\'/', $data, $matches)) {
    823             $this->error = sprintf('Geshi::get_lang_fullname(%s): Regex can not detect language', $language);
    824             return false;
    825         }
    826 
    827         // return fullname for langname
    828         return stripcslashes($matches[1]);
    829     }
    830 
    831     /**
    832      * Sets the type of header to be used.
    833      *
    834      * If GESHI_HEADER_DIV is used, the code is surrounded in a "div".This
    835      * means more source code but more control over tab width and line-wrapping.
    836      * GESHI_HEADER_PRE means that a "pre" is used - less source, but less
    837      * control. Default is GESHI_HEADER_PRE.
    838      *
    839      * From 1.0.7.2, you can use GESHI_HEADER_NONE to specify that no header code
    840      * should be outputted.
    841      *
    842      * @param int The type of header to be used
    843      * @since 1.0.0
    844      */
    845     function set_header_type($type) {
    846         //Check if we got a valid header type
    847         if (!in_array($type, array(GESHI_HEADER_NONE, GESHI_HEADER_DIV,
    848             GESHI_HEADER_PRE, GESHI_HEADER_PRE_VALID, GESHI_HEADER_PRE_TABLE))) {
    849             $this->error = GESHI_ERROR_INVALID_HEADER_TYPE;
    850             return;
    851         }
    852 
    853         //Set that new header type
    854         $this->header_type = $type;
    855     }
    856 
    857     /**
    858      * Sets the styles for the code that will be outputted
    859      * when this object is parsed. The style should be a
    860      * string of valid stylesheet declarations
    861      *
    862      * @param string  The overall style for the outputted code block
    863      * @param boolean Whether to merge the styles with the current styles or not
    864      * @since 1.0.0
    865      */
    866     function set_overall_style($style, $preserve_defaults = false) {
    867         if (!$preserve_defaults) {
    868             $this->overall_style = $style;
    869         } else {
    870             $this->overall_style .= $style;
    871         }
    872     }
    873 
    874     /**
    875      * Sets the overall classname for this block of code. This
    876      * class can then be used in a stylesheet to style this object's
    877      * output
    878      *
    879      * @param string The class name to use for this block of code
    880      * @since 1.0.0
    881      */
    882     function set_overall_class($class) {
    883         $this->overall_class = $class;
    884     }
    885 
    886     /**
    887      * Sets the overall id for this block of code. This id can then
    888      * be used in a stylesheet to style this object's output
    889      *
    890      * @param string The ID to use for this block of code
    891      * @since 1.0.0
    892      */
    893     function set_overall_id($id) {
    894         $this->overall_id = $id;
    895     }
    896 
    897     /**
    898      * Sets whether CSS classes should be used to highlight the source. Default
    899      * is off, calling this method with no arguments will turn it on
    900      *
    901      * @param boolean Whether to turn classes on or not
    902      * @since 1.0.0
    903      */
    904     function enable_classes($flag = true) {
    905         $this->use_classes = ($flag) ? true : false;
    906     }
    907 
    908     /**
    909      * Sets the style for the actual code. This should be a string
    910      * containing valid stylesheet declarations. If $preserve_defaults is
    911      * true, then styles are merged with the default styles, with the
    912      * user defined styles having priority
    913      *
    914      * Note: Use this method to override any style changes you made to
    915      * the line numbers if you are using line numbers, else the line of
    916      * code will have the same style as the line number! Consult the
    917      * GeSHi documentation for more information about this.
    918      *
    919      * @param string  The style to use for actual code
    920      * @param boolean Whether to merge the current styles with the new styles
    921      * @since 1.0.2
    922      */
    923     function set_code_style($style, $preserve_defaults = false) {
    924         if (!$preserve_defaults) {
    925             $this->code_style = $style;
    926         } else {
    927             $this->code_style .= $style;
    928         }
    929     }
    930 
    931     /**
    932      * Sets the styles for the line numbers.
    933      *
    934      * @param string The style for the line numbers that are "normal"
    935      * @param string|boolean If a string, this is the style of the line
    936      *        numbers that are "fancy", otherwise if boolean then this
    937      *        defines whether the normal styles should be merged with the
    938      *        new normal styles or not
    939      * @param boolean If set, is the flag for whether to merge the "fancy"
    940      *        styles with the current styles or not
    941      * @since 1.0.2
    942      */
    943     function set_line_style($style1, $style2 = '', $preserve_defaults = false) {
    944         //Check if we got 2 or three parameters
    945         if (is_bool($style2)) {
    946             $preserve_defaults = $style2;
    947             $style2 = '';
    948         }
    949 
    950         //Actually set the new styles
    951         if (!$preserve_defaults) {
    952             $this->line_style1 = $style1;
    953             $this->line_style2 = $style2;
    954         } else {
    955             $this->line_style1 .= $style1;
    956             $this->line_style2 .= $style2;
    957         }
    958     }
    959 
    960     /**
    961      * Sets whether line numbers should be displayed.
    962      *
    963      * Valid values for the first parameter are:
    964      *
    965      *  - GESHI_NO_LINE_NUMBERS: Line numbers will not be displayed
    966      *  - GESHI_NORMAL_LINE_NUMBERS: Line numbers will be displayed
    967      *  - GESHI_FANCY_LINE_NUMBERS: Fancy line numbers will be displayed
    968      *
    969      * For fancy line numbers, the second parameter is used to signal which lines
    970      * are to be fancy. For example, if the value of this parameter is 5 then every
    971      * 5th line will be fancy.
    972      *
    973      * @param int How line numbers should be displayed
    974      * @param int Defines which lines are fancy
    975      * @since 1.0.0
    976      */
    977     function enable_line_numbers($flag, $nth_row = 5) {
    978         if (GESHI_NO_LINE_NUMBERS != $flag && GESHI_NORMAL_LINE_NUMBERS != $flag
    979             && GESHI_FANCY_LINE_NUMBERS != $flag) {
    980             $this->error = GESHI_ERROR_INVALID_LINE_NUMBER_TYPE;
    981         }
    982         $this->line_numbers = $flag;
    983         $this->line_nth_row = $nth_row;
    984     }
    985 
    986     /**
    987      * Sets wether spans and other HTML markup generated by GeSHi can
    988      * span over multiple lines or not. Defaults to true to reduce overhead.
    989      * Set it to false if you want to manipulate the output or manually display
    990      * the code in an ordered list.
    991      *
    992      * @param boolean Wether multiline spans are allowed or not
    993      * @since 1.0.7.22
    994      */
    995     function enable_multiline_span($flag) {
    996         $this->allow_multiline_span = (bool) $flag;
    997     }
    998 
    999     /**
   1000      * Get current setting for multiline spans, see GeSHi->enable_multiline_span().
   1001      *
   1002      * @see enable_multiline_span
   1003      * @return bool
   1004      */
   1005     function get_multiline_span() {
   1006         return $this->allow_multiline_span;
   1007     }
   1008 
   1009     /**
   1010      * Sets the style for a keyword group. If $preserve_defaults is
   1011      * true, then styles are merged with the default styles, with the
   1012      * user defined styles having priority
   1013      *
   1014      * @param int     The key of the keyword group to change the styles of
   1015      * @param string  The style to make the keywords
   1016      * @param boolean Whether to merge the new styles with the old or just
   1017      *                to overwrite them
   1018      * @since 1.0.0
   1019      */
   1020     function set_keyword_group_style($key, $style, $preserve_defaults = false) {
   1021         //Set the style for this keyword group
   1022         if (!$preserve_defaults) {
   1023             $this->language_data['STYLES']['KEYWORDS'][$key] = $style;
   1024         } else {
   1025             $this->language_data['STYLES']['KEYWORDS'][$key] .= $style;
   1026         }
   1027 
   1028         //Update the lexic permissions
   1029         if (!isset($this->lexic_permissions['KEYWORDS'][$key])) {
   1030             $this->lexic_permissions['KEYWORDS'][$key] = true;
   1031         }
   1032     }
   1033 
   1034     /**
   1035      * Turns highlighting on/off for a keyword group
   1036      *
   1037      * @param int     The key of the keyword group to turn on or off
   1038      * @param boolean Whether to turn highlighting for that group on or off
   1039      * @since 1.0.0
   1040      */
   1041     function set_keyword_group_highlighting($key, $flag = true) {
   1042         $this->lexic_permissions['KEYWORDS'][$key] = ($flag) ? true : false;
   1043     }
   1044 
   1045     /**
   1046      * Sets the styles for comment groups.  If $preserve_defaults is
   1047      * true, then styles are merged with the default styles, with the
   1048      * user defined styles having priority
   1049      *
   1050      * @param int     The key of the comment group to change the styles of
   1051      * @param string  The style to make the comments
   1052      * @param boolean Whether to merge the new styles with the old or just
   1053      *                to overwrite them
   1054      * @since 1.0.0
   1055      */
   1056     function set_comments_style($key, $style, $preserve_defaults = false) {
   1057         if (!$preserve_defaults) {
   1058             $this->language_data['STYLES']['COMMENTS'][$key] = $style;
   1059         } else {
   1060             $this->language_data['STYLES']['COMMENTS'][$key] .= $style;
   1061         }
   1062     }
   1063 
   1064     /**
   1065      * Turns highlighting on/off for comment groups
   1066      *
   1067      * @param int     The key of the comment group to turn on or off
   1068      * @param boolean Whether to turn highlighting for that group on or off
   1069      * @since 1.0.0
   1070      */
   1071     function set_comments_highlighting($key, $flag = true) {
   1072         $this->lexic_permissions['COMMENTS'][$key] = ($flag) ? true : false;
   1073     }
   1074 
   1075     /**
   1076      * Sets the styles for escaped characters. If $preserve_defaults is
   1077      * true, then styles are merged with the default styles, with the
   1078      * user defined styles having priority
   1079      *
   1080      * @param string  The style to make the escape characters
   1081      * @param boolean Whether to merge the new styles with the old or just
   1082      *                to overwrite them
   1083      * @since 1.0.0
   1084      */
   1085     function set_escape_characters_style($style, $preserve_defaults = false, $group = 0) {
   1086         if (!$preserve_defaults) {
   1087             $this->language_data['STYLES']['ESCAPE_CHAR'][$group] = $style;
   1088         } else {
   1089             $this->language_data['STYLES']['ESCAPE_CHAR'][$group] .= $style;
   1090         }
   1091     }
   1092 
   1093     /**
   1094      * Turns highlighting on/off for escaped characters
   1095      *
   1096      * @param boolean Whether to turn highlighting for escape characters on or off
   1097      * @since 1.0.0
   1098      */
   1099     function set_escape_characters_highlighting($flag = true) {
   1100         $this->lexic_permissions['ESCAPE_CHAR'] = ($flag) ? true : false;
   1101     }
   1102 
   1103     /**
   1104      * Sets the styles for brackets. If $preserve_defaults is
   1105      * true, then styles are merged with the default styles, with the
   1106      * user defined styles having priority
   1107      *
   1108      * This method is DEPRECATED: use set_symbols_style instead.
   1109      * This method will be removed in 1.2.X
   1110      *
   1111      * @param string  The style to make the brackets
   1112      * @param boolean Whether to merge the new styles with the old or just
   1113      *                to overwrite them
   1114      * @since 1.0.0
   1115      * @deprecated In favour of set_symbols_style
   1116      */
   1117     function set_brackets_style($style, $preserve_defaults = false) {
   1118         if (!$preserve_defaults) {
   1119             $this->language_data['STYLES']['BRACKETS'][0] = $style;
   1120         } else {
   1121             $this->language_data['STYLES']['BRACKETS'][0] .= $style;
   1122         }
   1123     }
   1124 
   1125     /**
   1126      * Turns highlighting on/off for brackets
   1127      *
   1128      * This method is DEPRECATED: use set_symbols_highlighting instead.
   1129      * This method will be remove in 1.2.X
   1130      *
   1131      * @param boolean Whether to turn highlighting for brackets on or off
   1132      * @since 1.0.0
   1133      * @deprecated In favour of set_symbols_highlighting
   1134      */
   1135     function set_brackets_highlighting($flag) {
   1136         $this->lexic_permissions['BRACKETS'] = ($flag) ? true : false;
   1137     }
   1138 
   1139     /**
   1140      * Sets the styles for symbols. If $preserve_defaults is
   1141      * true, then styles are merged with the default styles, with the
   1142      * user defined styles having priority
   1143      *
   1144      * @param string  The style to make the symbols
   1145      * @param boolean Whether to merge the new styles with the old or just
   1146      *                to overwrite them
   1147      * @param int     Tells the group of symbols for which style should be set.
   1148      * @since 1.0.1
   1149      */
   1150     function set_symbols_style($style, $preserve_defaults = false, $group = 0) {
   1151         // Update the style of symbols
   1152         if (!$preserve_defaults) {
   1153             $this->language_data['STYLES']['SYMBOLS'][$group] = $style;
   1154         } else {
   1155             $this->language_data['STYLES']['SYMBOLS'][$group] .= $style;
   1156         }
   1157 
   1158         // For backward compatibility
   1159         if (0 == $group) {
   1160             $this->set_brackets_style ($style, $preserve_defaults);
   1161         }
   1162     }
   1163 
   1164     /**
   1165      * Turns highlighting on/off for symbols
   1166      *
   1167      * @param boolean Whether to turn highlighting for symbols on or off
   1168      * @since 1.0.0
   1169      */
   1170     function set_symbols_highlighting($flag) {
   1171         // Update lexic permissions for this symbol group
   1172         $this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false;
   1173 
   1174         // For backward compatibility
   1175         $this->set_brackets_highlighting ($flag);
   1176     }
   1177 
   1178     /**
   1179      * Sets the styles for strings. If $preserve_defaults is
   1180      * true, then styles are merged with the default styles, with the
   1181      * user defined styles having priority
   1182      *
   1183      * @param string  The style to make the escape characters
   1184      * @param boolean Whether to merge the new styles with the old or just
   1185      *                to overwrite them
   1186      * @param int     Tells the group of strings for which style should be set.
   1187      * @since 1.0.0
   1188      */
   1189     function set_strings_style($style, $preserve_defaults = false, $group = 0) {
   1190         if (!$preserve_defaults) {
   1191             $this->language_data['STYLES']['STRINGS'][$group] = $style;
   1192         } else {
   1193             $this->language_data['STYLES']['STRINGS'][$group] .= $style;
   1194         }
   1195     }
   1196 
   1197     /**
   1198      * Turns highlighting on/off for strings
   1199      *
   1200      * @param boolean Whether to turn highlighting for strings on or off
   1201      * @since 1.0.0
   1202      */
   1203     function set_strings_highlighting($flag) {
   1204         $this->lexic_permissions['STRINGS'] = ($flag) ? true : false;
   1205     }
   1206 
   1207     /**
   1208      * Sets the styles for strict code blocks. If $preserve_defaults is
   1209      * true, then styles are merged with the default styles, with the
   1210      * user defined styles having priority
   1211      *
   1212      * @param string  The style to make the script blocks
   1213      * @param boolean Whether to merge the new styles with the old or just
   1214      *                to overwrite them
   1215      * @param int     Tells the group of script blocks for which style should be set.
   1216      * @since 1.0.8.4
   1217      */
   1218     function set_script_style($style, $preserve_defaults = false, $group = 0) {
   1219         // Update the style of symbols
   1220         if (!$preserve_defaults) {
   1221             $this->language_data['STYLES']['SCRIPT'][$group] = $style;
   1222         } else {
   1223             $this->language_data['STYLES']['SCRIPT'][$group] .= $style;
   1224         }
   1225     }
   1226 
   1227     /**
   1228      * Sets the styles for numbers. If $preserve_defaults is
   1229      * true, then styles are merged with the default styles, with the
   1230      * user defined styles having priority
   1231      *
   1232      * @param string  The style to make the numbers
   1233      * @param boolean Whether to merge the new styles with the old or just
   1234      *                to overwrite them
   1235      * @param int     Tells the group of numbers for which style should be set.
   1236      * @since 1.0.0
   1237      */
   1238     function set_numbers_style($style, $preserve_defaults = false, $group = 0) {
   1239         if (!$preserve_defaults) {
   1240             $this->language_data['STYLES']['NUMBERS'][$group] = $style;
   1241         } else {
   1242             $this->language_data['STYLES']['NUMBERS'][$group] .= $style;
   1243         }
   1244     }
   1245 
   1246     /**
   1247      * Turns highlighting on/off for numbers
   1248      *
   1249      * @param boolean Whether to turn highlighting for numbers on or off
   1250      * @since 1.0.0
   1251      */
   1252     function set_numbers_highlighting($flag) {
   1253         $this->lexic_permissions['NUMBERS'] = ($flag) ? true : false;
   1254     }
   1255 
   1256     /**
   1257      * Sets the styles for methods. $key is a number that references the
   1258      * appropriate "object splitter" - see the language file for the language
   1259      * you are highlighting to get this number. If $preserve_defaults is
   1260      * true, then styles are merged with the default styles, with the
   1261      * user defined styles having priority
   1262      *
   1263      * @param int     The key of the object splitter to change the styles of
   1264      * @param string  The style to make the methods
   1265      * @param boolean Whether to merge the new styles with the old or just
   1266      *                to overwrite them
   1267      * @since 1.0.0
   1268      */
   1269     function set_methods_style($key, $style, $preserve_defaults = false) {
   1270         if (!$preserve_defaults) {
   1271             $this->language_data['STYLES']['METHODS'][$key] = $style;
   1272         } else {
   1273             $this->language_data['STYLES']['METHODS'][$key] .= $style;
   1274         }
   1275     }
   1276 
   1277     /**
   1278      * Turns highlighting on/off for methods
   1279      *
   1280      * @param boolean Whether to turn highlighting for methods on or off
   1281      * @since 1.0.0
   1282      */
   1283     function set_methods_highlighting($flag) {
   1284         $this->lexic_permissions['METHODS'] = ($flag) ? true : false;
   1285     }
   1286 
   1287     /**
   1288      * Sets the styles for regexps. If $preserve_defaults is
   1289      * true, then styles are merged with the default styles, with the
   1290      * user defined styles having priority
   1291      *
   1292      * @param string  The style to make the regular expression matches
   1293      * @param boolean Whether to merge the new styles with the old or just
   1294      *                to overwrite them
   1295      * @since 1.0.0
   1296      */
   1297     function set_regexps_style($key, $style, $preserve_defaults = false) {
   1298         if (!$preserve_defaults) {
   1299             $this->language_data['STYLES']['REGEXPS'][$key] = $style;
   1300         } else {
   1301             $this->language_data['STYLES']['REGEXPS'][$key] .= $style;
   1302         }
   1303     }
   1304 
   1305     /**
   1306      * Turns highlighting on/off for regexps
   1307      *
   1308      * @param int     The key of the regular expression group to turn on or off
   1309      * @param boolean Whether to turn highlighting for the regular expression group on or off
   1310      * @since 1.0.0
   1311      */
   1312     function set_regexps_highlighting($key, $flag) {
   1313         $this->lexic_permissions['REGEXPS'][$key] = ($flag) ? true : false;
   1314     }
   1315 
   1316     /**
   1317      * Sets whether a set of keywords are checked for in a case sensitive manner
   1318      *
   1319      * @param int The key of the keyword group to change the case sensitivity of
   1320      * @param boolean Whether to check in a case sensitive manner or not
   1321      * @since 1.0.0
   1322      */
   1323     function set_case_sensitivity($key, $case) {
   1324         $this->language_data['CASE_SENSITIVE'][$key] = ($case) ? true : false;
   1325     }
   1326 
   1327     /**
   1328      * Sets the case that keywords should use when found. Use the constants:
   1329      *
   1330      *  - GESHI_CAPS_NO_CHANGE: leave keywords as-is
   1331      *  - GESHI_CAPS_UPPER: convert all keywords to uppercase where found
   1332      *  - GESHI_CAPS_LOWER: convert all keywords to lowercase where found
   1333      *
   1334      * @param int A constant specifying what to do with matched keywords
   1335      * @since 1.0.1
   1336      */
   1337     function set_case_keywords($case) {
   1338         if (in_array($case, array(
   1339             GESHI_CAPS_NO_CHANGE, GESHI_CAPS_UPPER, GESHI_CAPS_LOWER))) {
   1340             $this->language_data['CASE_KEYWORDS'] = $case;
   1341         }
   1342     }
   1343 
   1344     /**
   1345      * Sets how many spaces a tab is substituted for
   1346      *
   1347      * Widths below zero are ignored
   1348      *
   1349      * @param int The tab width
   1350      * @since 1.0.0
   1351      */
   1352     function set_tab_width($width) {
   1353         $this->tab_width = intval($width);
   1354 
   1355         //Check if it fit's the constraints:
   1356         if ($this->tab_width < 1) {
   1357             //Return it to the default
   1358             $this->tab_width = 8;
   1359         }
   1360     }
   1361 
   1362     /**
   1363      * Sets whether or not to use tab-stop width specifed by language
   1364      *
   1365      * @param boolean Whether to use language-specific tab-stop widths
   1366      * @since 1.0.7.20
   1367      */
   1368     function set_use_language_tab_width($use) {
   1369         $this->use_language_tab_width = (bool) $use;
   1370     }
   1371 
   1372     /**
   1373      * Returns the tab width to use, based on the current language and user
   1374      * preference
   1375      *
   1376      * @return int Tab width
   1377      * @since 1.0.7.20
   1378      */
   1379     function get_real_tab_width() {
   1380         if (!$this->use_language_tab_width ||
   1381             !isset($this->language_data['TAB_WIDTH'])) {
   1382             return $this->tab_width;
   1383         } else {
   1384             return $this->language_data['TAB_WIDTH'];
   1385         }
   1386     }
   1387 
   1388     /**
   1389      * Enables/disables strict highlighting. Default is off, calling this
   1390      * method without parameters will turn it on. See documentation
   1391      * for more details on strict mode and where to use it.
   1392      *
   1393      * @param boolean Whether to enable strict mode or not
   1394      * @since 1.0.0
   1395      */
   1396     function enable_strict_mode($mode = true) {
   1397         if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) {
   1398             $this->strict_mode = ($mode) ? GESHI_ALWAYS : GESHI_NEVER;
   1399         }
   1400     }
   1401 
   1402     /**
   1403      * Disables all highlighting
   1404      *
   1405      * @since 1.0.0
   1406      * @todo  Rewrite with array traversal
   1407      * @deprecated In favour of enable_highlighting
   1408      */
   1409     function disable_highlighting() {
   1410         $this->enable_highlighting(false);
   1411     }
   1412 
   1413     /**
   1414      * Enables all highlighting
   1415      *
   1416      * The optional flag parameter was added in version 1.0.7.21 and can be used
   1417      * to enable (true) or disable (false) all highlighting.
   1418      *
   1419      * @since 1.0.0
   1420      * @param boolean A flag specifying whether to enable or disable all highlighting
   1421      * @todo  Rewrite with array traversal
   1422      */
   1423     function enable_highlighting($flag = true) {
   1424         $flag = $flag ? true : false;
   1425         foreach ($this->lexic_permissions as $key => $value) {
   1426             if (is_array($value)) {
   1427                 foreach ($value as $k => $v) {
   1428                     $this->lexic_permissions[$key][$k] = $flag;
   1429                 }
   1430             } else {
   1431                 $this->lexic_permissions[$key] = $flag;
   1432             }
   1433         }
   1434 
   1435         // Context blocks
   1436         $this->enable_important_blocks = $flag;
   1437     }
   1438 
   1439     /**
   1440      * Given a file extension, this method returns either a valid geshi language
   1441      * name, or the empty string if it couldn't be found
   1442      *
   1443      * @param string The extension to get a language name for
   1444      * @param array  A lookup array to use instead of the default one
   1445      * @since 1.0.5
   1446      * @todo Re-think about how this method works (maybe make it private and/or make it
   1447      *       a extension->lang lookup?)
   1448      * @todo static?
   1449      */
   1450     function get_language_name_from_extension( $extension, $lookup = array() ) {
   1451         $extension = strtolower($extension);
   1452 
   1453         if ( !is_array($lookup) || empty($lookup)) {
   1454             $lookup = array(
   1455                 '6502acme' => array( 'a', 's', 'asm', 'inc' ),
   1456                 '6502tasm' => array( 'a', 's', 'asm', 'inc' ),
   1457                 '6502kickass' => array( 'a', 's', 'asm', 'inc' ),
   1458                 '68000devpac' => array( 'a', 's', 'asm', 'inc' ),
   1459                 'abap' => array('abap'),
   1460                 'actionscript' => array('as'),
   1461                 'ada' => array('a', 'ada', 'adb', 'ads'),
   1462                 'apache' => array('conf'),
   1463                 'asm' => array('ash', 'asm', 'inc'),
   1464                 'asp' => array('asp'),
   1465                 'bash' => array('sh'),
   1466                 'bf' => array('bf'),
   1467                 'c' => array('c', 'h'),
   1468                 'c_mac' => array('c', 'h'),
   1469                 'caddcl' => array(),
   1470                 'cadlisp' => array(),
   1471                 'cdfg' => array('cdfg'),
   1472                 'cobol' => array('cbl'),
   1473                 'cpp' => array('cpp', 'hpp', 'C', 'H', 'CPP', 'HPP'),
   1474                 'csharp' => array('cs'),
   1475                 'css' => array('css'),
   1476                 'd' => array('d'),
   1477                 'delphi' => array('dpk', 'dpr', 'pp', 'pas'),
   1478                 'diff' => array('diff', 'patch'),
   1479                 'dos' => array('bat', 'cmd'),
   1480                 'gdb' => array('kcrash', 'crash', 'bt'),
   1481                 'gettext' => array('po', 'pot'),
   1482                 'gml' => array('gml'),
   1483                 'gnuplot' => array('plt'),
   1484                 'groovy' => array('groovy'),
   1485                 'haskell' => array('hs'),
   1486                 'haxe' => array('hx'),
   1487                 'html4strict' => array('html', 'htm'),
   1488                 'ini' => array('ini', 'desktop'),
   1489                 'java' => array('java'),
   1490                 'javascript' => array('js'),
   1491                 'klonec' => array('kl1'),
   1492                 'klonecpp' => array('klx'),
   1493                 'latex' => array('tex'),
   1494                 'lisp' => array('lisp'),
   1495                 'lua' => array('lua'),
   1496                 'matlab' => array('m'),
   1497                 'mpasm' => array(),
   1498                 'mysql' => array('sql'),
   1499                 'nsis' => array(),
   1500                 'objc' => array(),
   1501                 'oobas' => array(),
   1502                 'oracle8' => array(),
   1503                 'oracle10' => array(),
   1504                 'pascal' => array('pas'),
   1505                 'perl' => array('pl', 'pm'),
   1506                 'php' => array('php', 'php5', 'phtml', 'phps'),
   1507                 'povray' => array('pov'),
   1508                 'providex' => array('pvc', 'pvx'),
   1509                 'prolog' => array('pl'),
   1510                 'python' => array('py'),
   1511                 'qbasic' => array('bi'),
   1512                 'reg' => array('reg'),
   1513                 'ruby' => array('rb'),
   1514                 'sas' => array('sas'),
   1515                 'scala' => array('scala'),
   1516                 'scheme' => array('scm'),
   1517                 'scilab' => array('sci'),
   1518                 'smalltalk' => array('st'),
   1519                 'smarty' => array(),
   1520                 'tcl' => array('tcl'),
   1521                 'text' => array('txt'),
   1522                 'vb' => array('bas'),
   1523                 'vbnet' => array(),
   1524                 'visualfoxpro' => array(),
   1525                 'whitespace' => array('ws'),
   1526                 'xml' => array('xml', 'svg', 'xrc'),
   1527                 'z80' => array('z80', 'asm', 'inc')
   1528             );
   1529         }
   1530 
   1531         foreach ($lookup as $lang => $extensions) {
   1532             if (in_array($extension, $extensions)) {
   1533                 return $lang;
   1534             }
   1535         }
   1536 
   1537         return 'text';
   1538     }
   1539 
   1540     /**
   1541      * Given a file name, this method loads its contents in, and attempts
   1542      * to set the language automatically. An optional lookup table can be
   1543      * passed for looking up the language name. If not specified a default
   1544      * table is used
   1545      *
   1546      * The language table is in the form
   1547      * <pre>array(
   1548      *   'lang_name' => array('extension', 'extension', ...),
   1549      *   'lang_name' ...
   1550      * );</pre>
   1551      *
   1552      * @param string The filename to load the source from
   1553      * @param array  A lookup array to use instead of the default one
   1554      * @todo Complete rethink of this and above method
   1555      * @since 1.0.5
   1556      */
   1557     function load_from_file($file_name, $lookup = array()) {
   1558         if (is_readable($file_name)) {
   1559             $this->set_source(file_get_contents($file_name));
   1560             $this->set_language($this->get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup));
   1561         } else {
   1562             $this->error = GESHI_ERROR_FILE_NOT_READABLE;
   1563         }
   1564     }
   1565 
   1566     /**
   1567      * Adds a keyword to a keyword group for highlighting
   1568      *
   1569      * @param int    The key of the keyword group to add the keyword to
   1570      * @param string The word to add to the keyword group
   1571      * @since 1.0.0
   1572      */
   1573     function add_keyword($key, $word) {
   1574         if (!is_array($this->language_data['KEYWORDS'][$key])) {
   1575             $this->language_data['KEYWORDS'][$key] = array();
   1576         }
   1577         if (!in_array($word, $this->language_data['KEYWORDS'][$key])) {
   1578             $this->language_data['KEYWORDS'][$key][] = $word;
   1579 
   1580             //NEW in 1.0.8 don't recompile the whole optimized regexp, simply append it
   1581             if ($this->parse_cache_built) {
   1582                 $subkey = count($this->language_data['CACHED_KEYWORD_LISTS'][$key]) - 1;
   1583                 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$subkey] .= '|' . preg_quote($word, '/');
   1584             }
   1585         }
   1586     }
   1587 
   1588     /**
   1589      * Removes a keyword from a keyword group
   1590      *
   1591      * @param int    The key of the keyword group to remove the keyword from
   1592      * @param string The word to remove from the keyword group
   1593      * @param bool   Wether to automatically recompile the optimized regexp list or not.
   1594      *               Note: if you set this to false and @see GeSHi->parse_code() was already called once,
   1595      *               for the current language, you have to manually call @see GeSHi->optimize_keyword_group()
   1596      *               or the removed keyword will stay in cache and still be highlighted! On the other hand
   1597      *               it might be too expensive to recompile the regexp list for every removal if you want to
   1598      *               remove a lot of keywords.
   1599      * @since 1.0.0
   1600      */
   1601     function remove_keyword($key, $word, $recompile = true) {
   1602         $key_to_remove = array_search($word, $this->language_data['KEYWORDS'][$key]);
   1603         if ($key_to_remove !== false) {
   1604             unset($this->language_data['KEYWORDS'][$key][$key_to_remove]);
   1605 
   1606             //NEW in 1.0.8, optionally recompile keyword group
   1607             if ($recompile && $this->parse_cache_built) {
   1608                 $this->optimize_keyword_group($key);
   1609             }
   1610         }
   1611     }
   1612 
   1613     /**
   1614      * Creates a new keyword group
   1615      *
   1616      * @param int    The key of the keyword group to create
   1617      * @param string The styles for the keyword group
   1618      * @param boolean Whether the keyword group is case sensitive ornot
   1619      * @param array  The words to use for the keyword group
   1620      * @since 1.0.0
   1621      */
   1622     function add_keyword_group($key, $styles, $case_sensitive = true, $words = array()) {
   1623         $words = (array) $words;
   1624         if  (empty($words)) {
   1625             // empty word lists mess up highlighting
   1626             return false;
   1627         }
   1628 
   1629         //Add the new keyword group internally
   1630         $this->language_data['KEYWORDS'][$key] = $words;
   1631         $this->lexic_permissions['KEYWORDS'][$key] = true;
   1632         $this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive;
   1633         $this->language_data['STYLES']['KEYWORDS'][$key] = $styles;
   1634 
   1635         //NEW in 1.0.8, cache keyword regexp
   1636         if ($this->parse_cache_built) {
   1637             $this->optimize_keyword_group($key);
   1638         }
   1639     }
   1640 
   1641     /**
   1642      * Removes a keyword group
   1643      *
   1644      * @param int    The key of the keyword group to remove
   1645      * @since 1.0.0
   1646      */
   1647     function remove_keyword_group ($key) {
   1648         //Remove the keyword group internally
   1649         unset($this->language_data['KEYWORDS'][$key]);
   1650         unset($this->lexic_permissions['KEYWORDS'][$key]);
   1651         unset($this->language_data['CASE_SENSITIVE'][$key]);
   1652         unset($this->language_data['STYLES']['KEYWORDS'][$key]);
   1653 
   1654         //NEW in 1.0.8
   1655         unset($this->language_data['CACHED_KEYWORD_LISTS'][$key]);
   1656     }
   1657 
   1658     /**
   1659      * compile optimized regexp list for keyword group
   1660      *
   1661      * @param int   The key of the keyword group to compile & optimize
   1662      * @since 1.0.8
   1663      */
   1664     function optimize_keyword_group($key) {
   1665         $this->language_data['CACHED_KEYWORD_LISTS'][$key] =
   1666             $this->optimize_regexp_list($this->language_data['KEYWORDS'][$key]);
   1667         $space_as_whitespace = false;
   1668         if(isset($this->language_data['PARSER_CONTROL'])) {
   1669             if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
   1670                 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'])) {
   1671                     $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'];
   1672                 }
   1673                 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
   1674                     if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
   1675                         $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'];
   1676                     }
   1677                 }
   1678             }
   1679         }
   1680         if($space_as_whitespace) {
   1681             foreach($this->language_data['CACHED_KEYWORD_LISTS'][$key] as $rxk => $rxv) {
   1682                 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$rxk] =
   1683                     str_replace(" ", "\\s+", $rxv);
   1684             }
   1685         }
   1686     }
   1687 
   1688     /**
   1689      * Sets the content of the header block
   1690      *
   1691      * @param string The content of the header block
   1692      * @since 1.0.2
   1693      */
   1694     function set_header_content($content) {
   1695         $this->header_content = $content;
   1696     }
   1697 
   1698     /**
   1699      * Sets the content of the footer block
   1700      *
   1701      * @param string The content of the footer block
   1702      * @since 1.0.2
   1703      */
   1704     function set_footer_content($content) {
   1705         $this->footer_content = $content;
   1706     }
   1707 
   1708     /**
   1709      * Sets the style for the header content
   1710      *
   1711      * @param string The style for the header content
   1712      * @since 1.0.2
   1713      */
   1714     function set_header_content_style($style) {
   1715         $this->header_content_style = $style;
   1716     }
   1717 
   1718     /**
   1719      * Sets the style for the footer content
   1720      *
   1721      * @param string The style for the footer content
   1722      * @since 1.0.2
   1723      */
   1724     function set_footer_content_style($style) {
   1725         $this->footer_content_style = $style;
   1726     }
   1727 
   1728     /**
   1729      * Sets whether to force a surrounding block around
   1730      * the highlighted code or not
   1731      *
   1732      * @param boolean Tells whether to enable or disable this feature
   1733      * @since 1.0.7.20
   1734      */
   1735     function enable_inner_code_block($flag) {
   1736         $this->force_code_block = (bool)$flag;
   1737     }
   1738 
   1739     /**
   1740      * Sets the base URL to be used for keywords
   1741      *
   1742      * @param int The key of the keyword group to set the URL for
   1743      * @param string The URL to set for the group. If {FNAME} is in
   1744      *               the url somewhere, it is replaced by the keyword
   1745      *               that the URL is being made for
   1746      * @since 1.0.2
   1747      */
   1748     function set_url_for_keyword_group($group, $url) {
   1749         $this->language_data['URLS'][$group] = $url;
   1750     }
   1751 
   1752     /**
   1753      * Sets styles for links in code
   1754      *
   1755      * @param int A constant that specifies what state the style is being
   1756      *            set for - e.g. :hover or :visited
   1757      * @param string The styles to use for that state
   1758      * @since 1.0.2
   1759      */
   1760     function set_link_styles($type, $styles) {
   1761         $this->link_styles[$type] = $styles;
   1762     }
   1763 
   1764     /**
   1765      * Sets the target for links in code
   1766      *
   1767      * @param string The target for links in the code, e.g. _blank
   1768      * @since 1.0.3
   1769      */
   1770     function set_link_target($target) {
   1771         if (!$target) {
   1772             $this->link_target = '';
   1773         } else {
   1774             $this->link_target = ' target="' . $target . '"';
   1775         }
   1776     }
   1777 
   1778     /**
   1779      * Sets styles for important parts of the code
   1780      *
   1781      * @param string The styles to use on important parts of the code
   1782      * @since 1.0.2
   1783      */
   1784     function set_important_styles($styles) {
   1785         $this->important_styles = $styles;
   1786     }
   1787 
   1788     /**
   1789      * Sets whether context-important blocks are highlighted
   1790      *
   1791      * @param boolean Tells whether to enable or disable highlighting of important blocks
   1792      * @todo REMOVE THIS SHIZ FROM GESHI!
   1793      * @deprecated
   1794      * @since 1.0.2
   1795      */
   1796     function enable_important_blocks($flag) {
   1797         $this->enable_important_blocks = ( $flag ) ? true : false;
   1798     }
   1799 
   1800     /**
   1801      * Whether CSS IDs should be added to each line
   1802      *
   1803      * @param boolean If true, IDs will be added to each line.
   1804      * @since 1.0.2
   1805      */
   1806     function enable_ids($flag = true) {
   1807         $this->add_ids = ($flag) ? true : false;
   1808     }
   1809 
   1810     /**
   1811      * Specifies which lines to highlight extra
   1812      *
   1813      * The extra style parameter was added in 1.0.7.21.
   1814      *
   1815      * @param mixed An array of line numbers to highlight, or just a line
   1816      *              number on its own.
   1817      * @param string A string specifying the style to use for this line.
   1818      *              If null is specified, the default style is used.
   1819      *              If false is specified, the line will be removed from
   1820      *              special highlighting
   1821      * @since 1.0.2
   1822      * @todo  Some data replication here that could be cut down on
   1823      */
   1824     function highlight_lines_extra($lines, $style = null) {
   1825         if (is_array($lines)) {
   1826             //Split up the job using single lines at a time
   1827             foreach ($lines as $line) {
   1828                 $this->highlight_lines_extra($line, $style);
   1829             }
   1830         } else {
   1831             //Mark the line as being highlighted specially
   1832             $lines = intval($lines);
   1833             $this->highlight_extra_lines[$lines] = $lines;
   1834 
   1835             //Decide on which style to use
   1836             if ($style === null) { //Check if we should use default style
   1837                 unset($this->highlight_extra_lines_styles[$lines]);
   1838             } elseif ($style === false) { //Check if to remove this line
   1839                 unset($this->highlight_extra_lines[$lines]);
   1840                 unset($this->highlight_extra_lines_styles[$lines]);
   1841             } else {
   1842                 $this->highlight_extra_lines_styles[$lines] = $style;
   1843             }
   1844         }
   1845     }
   1846 
   1847     /**
   1848      * Sets the style for extra-highlighted lines
   1849      *
   1850      * @param string The style for extra-highlighted lines
   1851      * @since 1.0.2
   1852      */
   1853     function set_highlight_lines_extra_style($styles) {
   1854         $this->highlight_extra_lines_style = $styles;
   1855     }
   1856 
   1857     /**
   1858      * Sets the line-ending
   1859      *
   1860      * @param string The new line-ending
   1861      * @since 1.0.2
   1862      */
   1863     function set_line_ending($line_ending) {
   1864         $this->line_ending = (string)$line_ending;
   1865     }
   1866 
   1867     /**
   1868      * Sets what number line numbers should start at. Should
   1869      * be a positive integer, and will be converted to one.
   1870      *
   1871      * <b>Warning:</b> Using this method will add the "start"
   1872      * attribute to the &lt;ol&gt; that is used for line numbering.
   1873      * This is <b>not</b> valid XHTML strict, so if that's what you
   1874      * care about then don't use this method. Firefox is getting
   1875      * support for the CSS method of doing this in 1.1 and Opera
   1876      * has support for the CSS method, but (of course) IE doesn't
   1877      * so it's not worth doing it the CSS way yet.
   1878      *
   1879      * @param int The number to start line numbers at
   1880      * @since 1.0.2
   1881      */
   1882     function start_line_numbers_at($number) {
   1883         $this->line_numbers_start = abs(intval($number));
   1884     }
   1885 
   1886     /**
   1887      * Sets the encoding used for htmlspecialchars(), for international
   1888      * support.
   1889      *
   1890      * NOTE: This is not needed for now because htmlspecialchars() is not
   1891      * being used (it has a security hole in PHP4 that has not been patched).
   1892      * Maybe in a future version it may make a return for speed reasons, but
   1893      * I doubt it.
   1894      *
   1895      * @param string The encoding to use for the source
   1896      * @since 1.0.3
   1897      */
   1898     function set_encoding($encoding) {
   1899         if ($encoding) {
   1900           $this->encoding = strtolower($encoding);
   1901         }
   1902     }
   1903 
   1904     /**
   1905      * Turns linking of keywords on or off.
   1906      *
   1907      * @param boolean If true, links will be added to keywords
   1908      * @since 1.0.2
   1909      */
   1910     function enable_keyword_links($enable = true) {
   1911         $this->keyword_links = (bool) $enable;
   1912     }
   1913 
   1914     /**
   1915      * Setup caches needed for styling. This is automatically called in
   1916      * parse_code() and get_stylesheet() when appropriate. This function helps
   1917      * stylesheet generators as they rely on some style information being
   1918      * preprocessed
   1919      *
   1920      * @since 1.0.8
   1921      * @access private
   1922      */
   1923     function build_style_cache() {
   1924         //Build the style cache needed to highlight numbers appropriate
   1925         if($this->lexic_permissions['NUMBERS']) {
   1926             //First check what way highlighting information for numbers are given
   1927             if(!isset($this->language_data['NUMBERS'])) {
   1928                 $this->language_data['NUMBERS'] = 0;
   1929             }
   1930 
   1931             if(is_array($this->language_data['NUMBERS'])) {
   1932                 $this->language_data['NUMBERS_CACHE'] = $this->language_data['NUMBERS'];
   1933             } else {
   1934                 $this->language_data['NUMBERS_CACHE'] = array();
   1935                 if(!$this->language_data['NUMBERS']) {
   1936                     $this->language_data['NUMBERS'] =
   1937                         GESHI_NUMBER_INT_BASIC |
   1938                         GESHI_NUMBER_FLT_NONSCI;
   1939                 }
   1940 
   1941                 for($i = 0, $j = $this->language_data['NUMBERS']; $j > 0; ++$i, $j>>=1) {
   1942                     //Rearrange style indices if required ...
   1943                     if(isset($this->language_data['STYLES']['NUMBERS'][1<<$i])) {
   1944                         $this->language_data['STYLES']['NUMBERS'][$i] =
   1945                             $this->language_data['STYLES']['NUMBERS'][1<<$i];
   1946                         unset($this->language_data['STYLES']['NUMBERS'][1<<$i]);
   1947                     }
   1948 
   1949                     //Check if this bit is set for highlighting
   1950                     if($j&1) {
   1951                         //So this bit is set ...
   1952                         //Check if it belongs to group 0 or the actual stylegroup
   1953                         if(isset($this->language_data['STYLES']['NUMBERS'][$i])) {
   1954                             $this->language_data['NUMBERS_CACHE'][$i] = 1 << $i;
   1955                         } else {
   1956                             if(!isset($this->language_data['NUMBERS_CACHE'][0])) {
   1957                                 $this->language_data['NUMBERS_CACHE'][0] = 0;
   1958                             }
   1959                             $this->language_data['NUMBERS_CACHE'][0] |= 1 << $i;
   1960                         }
   1961                     }
   1962                 }
   1963             }
   1964         }
   1965     }
   1966 
   1967     /**
   1968      * Setup caches needed for parsing. This is automatically called in parse_code() when appropriate.
   1969      * This function makes stylesheet generators much faster as they do not need these caches.
   1970      *
   1971      * @since 1.0.8
   1972      * @access private
   1973      */
   1974     function build_parse_cache() {
   1975         // cache symbol regexp
   1976         //As this is a costy operation, we avoid doing it for multiple groups ...
   1977         //Instead we perform it for all symbols at once.
   1978         //
   1979         //For this to work, we need to reorganize the data arrays.
   1980         if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
   1981             $this->language_data['MULTIPLE_SYMBOL_GROUPS'] = count($this->language_data['STYLES']['SYMBOLS']) > 1;
   1982 
   1983             $this->language_data['SYMBOL_DATA'] = array();
   1984             $symbol_preg_multi = array(); // multi char symbols
   1985             $symbol_preg_single = array(); // single char symbols
   1986             foreach ($this->language_data['SYMBOLS'] as $key => $symbols) {
   1987                 if (is_array($symbols)) {
   1988                     foreach ($symbols as $sym) {
   1989                         $sym = $this->hsc($sym);
   1990                         if (!isset($this->language_data['SYMBOL_DATA'][$sym])) {
   1991                             $this->language_data['SYMBOL_DATA'][$sym] = $key;
   1992                             if (isset($sym[1])) { // multiple chars
   1993                                 $symbol_preg_multi[] = preg_quote($sym, '/');
   1994                             } else { // single char
   1995                                 if ($sym == '-') {
   1996                                     // don't trigger range out of order error
   1997                                     $symbol_preg_single[] = '\-';
   1998                                 } else {
   1999                                     $symbol_preg_single[] = preg_quote($sym, '/');
   2000                                 }
   2001                             }
   2002                         }
   2003                     }
   2004                 } else {
   2005                     $symbols = $this->hsc($symbols);
   2006                     if (!isset($this->language_data['SYMBOL_DATA'][$symbols])) {
   2007                         $this->language_data['SYMBOL_DATA'][$symbols] = 0;
   2008                         if (isset($symbols[1])) { // multiple chars
   2009                             $symbol_preg_multi[] = preg_quote($symbols, '/');
   2010                         } elseif ($symbols == '-') {
   2011                             // don't trigger range out of order error
   2012                             $symbol_preg_single[] = '\-';
   2013                         } else { // single char
   2014                             $symbol_preg_single[] = preg_quote($symbols, '/');
   2015                         }
   2016                     }
   2017                 }
   2018             }
   2019 
   2020             //Now we have an array with each possible symbol as the key and the style as the actual data.
   2021             //This way we can set the correct style just the moment we highlight ...
   2022             //
   2023             //Now we need to rewrite our array to get a search string that
   2024             $symbol_preg = array();
   2025             if (!empty($symbol_preg_multi)) {
   2026                 rsort($symbol_preg_multi);
   2027                 $symbol_preg[] = implode('|', $symbol_preg_multi);
   2028             }
   2029             if (!empty($symbol_preg_single)) {
   2030                 rsort($symbol_preg_single);
   2031                 $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']';
   2032             }
   2033             $this->language_data['SYMBOL_SEARCH'] = implode("|", $symbol_preg);
   2034         }
   2035 
   2036         // cache optimized regexp for keyword matching
   2037         // remove old cache
   2038         $this->language_data['CACHED_KEYWORD_LISTS'] = array();
   2039         foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
   2040             if (!isset($this->lexic_permissions['KEYWORDS'][$key]) ||
   2041                     $this->lexic_permissions['KEYWORDS'][$key]) {
   2042                 $this->optimize_keyword_group($key);
   2043             }
   2044         }
   2045 
   2046         // brackets
   2047         if ($this->lexic_permissions['BRACKETS']) {
   2048             $this->language_data['CACHE_BRACKET_MATCH'] = array('[', ']', '(', ')', '{', '}');
   2049             if (!$this->use_classes && isset($this->language_data['STYLES']['BRACKETS'][0])) {
   2050                 $this->language_data['CACHE_BRACKET_REPLACE'] = array(
   2051                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#91;|>',
   2052                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#93;|>',
   2053                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#40;|>',
   2054                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#41;|>',
   2055                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#123;|>',
   2056                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#125;|>',
   2057                 );
   2058             }
   2059             else {
   2060                 $this->language_data['CACHE_BRACKET_REPLACE'] = array(
   2061                     '<| class="br0">&#91;|>',
   2062                     '<| class="br0">&#93;|>',
   2063                     '<| class="br0">&#40;|>',
   2064                     '<| class="br0">&#41;|>',
   2065                     '<| class="br0">&#123;|>',
   2066                     '<| class="br0">&#125;|>',
   2067                 );
   2068             }
   2069         }
   2070 
   2071         //Build the parse cache needed to highlight numbers appropriate
   2072         if($this->lexic_permissions['NUMBERS']) {
   2073             //Check if the style rearrangements have been processed ...
   2074             //This also does some preprocessing to check which style groups are useable ...
   2075             if(!isset($this->language_data['NUMBERS_CACHE'])) {
   2076                 $this->build_style_cache();
   2077             }
   2078 
   2079             //Number format specification
   2080             //All this formats are matched case-insensitively!
   2081             static $numbers_format = array(
   2082                 GESHI_NUMBER_INT_BASIC =>
   2083                     '(?:(?<![0-9a-z_\.%$@])|(?<=\.\.))(?<![\d\.]e[+\-])([1-9]\d*?|0)(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
   2084                 GESHI_NUMBER_INT_CSTYLE =>
   2085                     '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)l(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
   2086                 GESHI_NUMBER_BIN_SUFFIX =>
   2087                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[01]+?[bB](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
   2088                 GESHI_NUMBER_BIN_PREFIX_PERCENT =>
   2089                     '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])%[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
   2090                 GESHI_NUMBER_BIN_PREFIX_0B =>
   2091                     '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0b[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
   2092                 GESHI_NUMBER_OCT_PREFIX =>
   2093                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
   2094                 GESHI_NUMBER_OCT_PREFIX_0O =>
   2095                     '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0o[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
   2096                 GESHI_NUMBER_OCT_PREFIX_AT =>
   2097                     '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])\@[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
   2098                 GESHI_NUMBER_OCT_SUFFIX =>
   2099                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[0-7]+?o(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
   2100                 GESHI_NUMBER_HEX_PREFIX =>
   2101                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0x[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
   2102                 GESHI_NUMBER_HEX_PREFIX_DOLLAR =>
   2103                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\$[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
   2104                 GESHI_NUMBER_HEX_SUFFIX =>
   2105                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d[0-9a-fA-F]*?[hH](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
   2106                 GESHI_NUMBER_FLT_NONSCI =>
   2107                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d+?\.\d+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
   2108                 GESHI_NUMBER_FLT_NONSCI_F =>
   2109                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)f(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
   2110                 GESHI_NUMBER_FLT_SCI_SHORT =>
   2111                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\.\d+?(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
   2112                 GESHI_NUMBER_FLT_SCI_ZERO =>
   2113                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)'
   2114                 );
   2115 
   2116             //At this step we have an associative array with flag groups for a
   2117             //specific style or an string denoting a regexp given its index.
   2118             $this->language_data['NUMBERS_RXCACHE'] = array();
   2119             foreach($this->language_data['NUMBERS_CACHE'] as $key => $rxdata) {
   2120                 if(is_string($rxdata)) {
   2121                     $regexp = $rxdata;
   2122                 } else {
   2123                     //This is a bitfield of number flags to highlight:
   2124                     //Build an array, implode them together and make this the actual RX
   2125                     $rxuse = array();
   2126                     for($i = 1; $i <= $rxdata; $i<<=1) {
   2127                         if($rxdata & $i) {
   2128                             $rxuse[] = $numbers_format[$i];
   2129                         }
   2130                     }
   2131                     $regexp = implode("|", $rxuse);
   2132                 }
   2133 
   2134                 $this->language_data['NUMBERS_RXCACHE'][$key] =
   2135                     "/(?<!<\|\/)(?<!<\|!REG3XP)(?<!<\|\/NUM!)(?<!\d\/>)($regexp)(?!(?:<DOT>|(?>[^\<]))+>)(?![^<]*>)(?!\|>)(?!\/>)/i"; //
   2136             }
   2137 
   2138             if(!isset($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'])) {
   2139                 $this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'] = '#\d#';
   2140             }
   2141         }
   2142 
   2143         $this->parse_cache_built = true;
   2144     }
   2145 
   2146     /**
   2147      * Returns the code in $this->source, highlighted and surrounded by the
   2148      * nessecary HTML.
   2149      *
   2150      * This should only be called ONCE, cos it's SLOW! If you want to highlight
   2151      * the same source multiple times, you're better off doing a whole lot of
   2152      * str_replaces to replace the &lt;span&gt;s
   2153      *
   2154      * @since 1.0.0
   2155      */
   2156     function parse_code () {
   2157         // Start the timer
   2158         $start_time = microtime();
   2159 
   2160         // Replace all newlines to a common form.
   2161         $code = str_replace("\r\n", "\n", $this->source);
   2162         $code = str_replace("\r", "\n", $code);
   2163 
   2164         // Firstly, if there is an error, we won't highlight
   2165         if ($this->error) {
   2166             //Escape the source for output
   2167             $result = $this->hsc($this->source);
   2168 
   2169             //This fix is related to SF#1923020, but has to be applied regardless of
   2170             //actually highlighting symbols.
   2171             $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $result);
   2172 
   2173             // Timing is irrelevant
   2174             $this->set_time($start_time, $start_time);
   2175             $this->finalise($result);
   2176             return $result;
   2177         }
   2178 
   2179         // make sure the parse cache is up2date
   2180         if (!$this->parse_cache_built) {
   2181             $this->build_parse_cache();
   2182         }
   2183 
   2184         // Initialise various stuff
   2185         $length           = strlen($code);
   2186         $COMMENT_MATCHED  = false;
   2187         $stuff_to_parse   = '';
   2188         $endresult        = '';
   2189 
   2190         // "Important" selections are handled like multiline comments
   2191         // @todo GET RID OF THIS SHIZ
   2192         if ($this->enable_important_blocks) {
   2193             $this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI_END_IMPORTANT;
   2194         }
   2195 
   2196         if ($this->strict_mode) {
   2197             // Break the source into bits. Each bit will be a portion of the code
   2198             // within script delimiters - for example, HTML between < and >
   2199             $k = 0;
   2200             $parts = array();
   2201             $matches = array();
   2202             $next_match_pointer = null;
   2203             // we use a copy to unset delimiters on demand (when they are not found)
   2204             $delim_copy = $this->language_data['SCRIPT_DELIMITERS'];
   2205             $i = 0;
   2206             while ($i < $length) {
   2207                 $next_match_pos = $length + 1; // never true
   2208                 foreach ($delim_copy as $dk => $delimiters) {
   2209                     if(is_array($delimiters)) {
   2210                         foreach ($delimiters as $open => $close) {
   2211                             // make sure the cache is setup properly
   2212                             if (!isset($matches[$dk][$open])) {
   2213                                 $matches[$dk][$open] = array(
   2214                                     'next_match' => -1,
   2215                                     'dk' => $dk,
   2216 
   2217                                     'open' => $open, // needed for grouping of adjacent code blocks (see below)
   2218                                     'open_strlen' => strlen($open),
   2219 
   2220                                     'close' => $close,
   2221                                     'close_strlen' => strlen($close),
   2222                                 );
   2223                             }
   2224                             // Get the next little bit for this opening string
   2225                             if ($matches[$dk][$open]['next_match'] < $i) {
   2226                                 // only find the next pos if it was not already cached
   2227                                 $open_pos = strpos($code, $open, $i);
   2228                                 if ($open_pos === false) {
   2229                                     // no match for this delimiter ever
   2230                                     unset($delim_copy[$dk][$open]);
   2231                                     continue;
   2232                                 }
   2233                                 $matches[$dk][$open]['next_match'] = $open_pos;
   2234                             }
   2235                             if ($matches[$dk][$open]['next_match'] < $next_match_pos) {
   2236                                 //So we got a new match, update the close_pos
   2237                                 $matches[$dk][$open]['close_pos'] =
   2238                                     strpos($code, $close, $matches[$dk][$open]['next_match']+1);
   2239 
   2240                                 $next_match_pointer =& $matches[$dk][$open];
   2241                                 $next_match_pos = $matches[$dk][$open]['next_match'];
   2242                             }
   2243                         }
   2244                     } else {
   2245                         //So we should match an RegExp as Strict Block ...
   2246                         /**
   2247                          * The value in $delimiters is expected to be an RegExp
   2248                          * containing exactly 2 matching groups:
   2249                          *  - Group 1 is the opener
   2250                          *  - Group 2 is the closer
   2251                          */
   2252                         if(!GESHI_PHP_PRE_433 && //Needs proper rewrite to work with PHP >=4.3.0; 4.3.3 is guaranteed to work.
   2253                             preg_match($delimiters, $code, $matches_rx, PREG_OFFSET_CAPTURE, $i)) {
   2254                             //We got a match ...
   2255                             if(isset($matches_rx['start']) && isset($matches_rx['end']))
   2256                             {
   2257                                 $matches[$dk] = array(
   2258                                     'next_match' => $matches_rx['start'][1],
   2259                                     'dk' => $dk,
   2260 
   2261                                     'close_strlen' => strlen($matches_rx['end'][0]),
   2262                                     'close_pos' => $matches_rx['end'][1],
   2263                                     );
   2264                             } else {
   2265                                 $matches[$dk] = array(
   2266                                     'next_match' => $matches_rx[1][1],
   2267                                     'dk' => $dk,
   2268 
   2269                                     'close_strlen' => strlen($matches_rx[2][0]),
   2270                                     'close_pos' => $matches_rx[2][1],
   2271                                     );
   2272                             }
   2273                         } else {
   2274                             // no match for this delimiter ever
   2275                             unset($delim_copy[$dk]);
   2276                             continue;
   2277                         }
   2278 
   2279                         if ($matches[$dk]['next_match'] <= $next_match_pos) {
   2280                             $next_match_pointer =& $matches[$dk];
   2281                             $next_match_pos = $matches[$dk]['next_match'];
   2282                         }
   2283                     }
   2284                 }
   2285 
   2286                 // non-highlightable text
   2287                 $parts[$k] = array(
   2288                     1 => substr($code, $i, $next_match_pos - $i)
   2289                 );
   2290                 ++$k;
   2291 
   2292                 if ($next_match_pos > $length) {
   2293                     // out of bounds means no next match was found
   2294                     break;
   2295                 }
   2296 
   2297                 // highlightable code
   2298                 $parts[$k][0] = $next_match_pointer['dk'];
   2299 
   2300                 //Only combine for non-rx script blocks
   2301                 if(is_array($delim_copy[$next_match_pointer['dk']])) {
   2302                     // group adjacent script blocks, e.g. <foobar><asdf> should be one block, not three!
   2303                     $i = $next_match_pos + $next_match_pointer['open_strlen'];
   2304                     while (true) {
   2305                         $close_pos = strpos($code, $next_match_pointer['close'], $i);
   2306                         if ($close_pos == false) {
   2307                             break;
   2308                         }
   2309                         $i = $close_pos + $next_match_pointer['close_strlen'];
   2310                         if ($i == $length) {
   2311                             break;
   2312                         }
   2313                         if ($code[$i] == $next_match_pointer['open'][0] && ($next_match_pointer['open_strlen'] == 1 ||
   2314                             substr($code, $i, $next_match_pointer['open_strlen']) == $next_match_pointer['open'])) {
   2315                             // merge adjacent but make sure we don't merge things like <tag><!-- comment -->
   2316                             foreach ($matches as $submatches) {
   2317                                 foreach ($submatches as $match) {
   2318                                     if ($match['next_match'] == $i) {
   2319                                         // a different block already matches here!
   2320                                         break 3;
   2321                                     }
   2322                                 }
   2323                             }
   2324                         } else {
   2325                             break;
   2326                         }
   2327                     }
   2328                 } else {
   2329                     $close_pos = $next_match_pointer['close_pos'] + $next_match_pointer['close_strlen'];
   2330                     $i = $close_pos;
   2331                 }
   2332 
   2333                 if ($close_pos === false) {
   2334                     // no closing delimiter found!
   2335                     $parts[$k][1] = substr($code, $next_match_pos);
   2336                     ++$k;
   2337                     break;
   2338                 } else {
   2339                     $parts[$k][1] = substr($code, $next_match_pos, $i - $next_match_pos);
   2340                     ++$k;
   2341                 }
   2342             }
   2343             unset($delim_copy, $next_match_pointer, $next_match_pos, $matches);
   2344             $num_parts = $k;
   2345 
   2346             if ($num_parts == 1 && $this->strict_mode == GESHI_MAYBE) {
   2347                 // when we have only one part, we don't have anything to highlight at all.
   2348                 // if we have a "maybe" strict language, this should be handled as highlightable code
   2349                 $parts = array(
   2350                     0 => array(
   2351                         0 => '',
   2352                         1 => ''
   2353                     ),
   2354                     1 => array(
   2355                         0 => null,
   2356                         1 => $parts[0][1]
   2357                     )
   2358                 );
   2359                 $num_parts = 2;
   2360             }
   2361 
   2362         } else {
   2363             // Not strict mode - simply dump the source into
   2364             // the array at index 1 (the first highlightable block)
   2365             $parts = array(
   2366                 0 => array(
   2367                     0 => '',
   2368                     1 => ''
   2369                 ),
   2370                 1 => array(
   2371                     0 => null,
   2372                     1 => $code
   2373                 )
   2374             );
   2375             $num_parts = 2;
   2376         }
   2377 
   2378         //Unset variables we won't need any longer
   2379         unset($code);
   2380 
   2381         //Preload some repeatedly used values regarding hardquotes ...
   2382         $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false;
   2383         $hq_strlen = strlen($hq);
   2384 
   2385         //Preload if line numbers are to be generated afterwards
   2386         //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398
   2387         $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS ||
   2388             !empty($this->highlight_extra_lines) || !$this->allow_multiline_span;
   2389 
   2390         //preload the escape char for faster checking ...
   2391         $escaped_escape_char = $this->hsc($this->language_data['ESCAPE_CHAR']);
   2392 
   2393         // this is used for single-line comments
   2394         $sc_disallowed_before = "";
   2395         $sc_disallowed_after = "";
   2396 
   2397         if (isset($this->language_data['PARSER_CONTROL'])) {
   2398             if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) {
   2399                 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) {
   2400                     $sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'];
   2401                 }
   2402                 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) {
   2403                     $sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'];
   2404                 }
   2405             }
   2406         }
   2407 
   2408         //Fix for SF#1932083: Multichar Quotemarks unsupported
   2409         $is_string_starter = array();
   2410         if ($this->lexic_permissions['STRINGS']) {
   2411             foreach ($this->language_data['QUOTEMARKS'] as $quotemark) {
   2412                 if (!isset($is_string_starter[$quotemark[0]])) {
   2413                     $is_string_starter[$quotemark[0]] = (string)$quotemark;
   2414                 } elseif (is_string($is_string_starter[$quotemark[0]])) {
   2415                     $is_string_starter[$quotemark[0]] = array(
   2416                         $is_string_starter[$quotemark[0]],
   2417                         $quotemark);
   2418                 } else {
   2419                     $is_string_starter[$quotemark[0]][] = $quotemark;
   2420                 }
   2421             }
   2422         }
   2423 
   2424         // Now we go through each part. We know that even-indexed parts are
   2425         // code that shouldn't be highlighted, and odd-indexed parts should
   2426         // be highlighted
   2427         for ($key = 0; $key < $num_parts; ++$key) {
   2428             $STRICTATTRS = '';
   2429 
   2430             // If this block should be highlighted...
   2431             if (!($key & 1)) {
   2432                 // Else not a block to highlight
   2433                 $endresult .= $this->hsc($parts[$key][1]);
   2434                 unset($parts[$key]);
   2435                 continue;
   2436             }
   2437 
   2438             $result = '';
   2439             $part = $parts[$key][1];
   2440 
   2441             $highlight_part = true;
   2442             if ($this->strict_mode && !is_null($parts[$key][0])) {
   2443                 // get the class key for this block of code
   2444                 $script_key = $parts[$key][0];
   2445                 $highlight_part = $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key];
   2446                 if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' &&
   2447                     $this->lexic_permissions['SCRIPT']) {
   2448                     // Add a span element around the source to
   2449                     // highlight the overall source block
   2450                     if (!$this->use_classes &&
   2451                         $this->language_data['STYLES']['SCRIPT'][$script_key] != '') {
   2452                         $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"';
   2453                     } else {
   2454                         $attributes = ' class="sc' . $script_key . '"';
   2455                     }
   2456                     $result .= "<span$attributes>";
   2457                     $STRICTATTRS = $attributes;
   2458                 }
   2459             }
   2460 
   2461             if ($highlight_part) {
   2462                 // Now, highlight the code in this block. This code
   2463                 // is really the engine of GeSHi (along with the method
   2464                 // parse_non_string_part).
   2465 
   2466                 // cache comment regexps incrementally
   2467                 $next_comment_regexp_key = '';
   2468                 $next_comment_regexp_pos = -1;
   2469                 $next_comment_multi_pos = -1;
   2470                 $next_comment_single_pos = -1;
   2471                 $comment_regexp_cache_per_key = array();
   2472                 $comment_multi_cache_per_key = array();
   2473                 $comment_single_cache_per_key = array();
   2474                 $next_open_comment_multi = '';
   2475                 $next_comment_single_key = '';
   2476                 $escape_regexp_cache_per_key = array();
   2477                 $next_escape_regexp_key = '';
   2478                 $next_escape_regexp_pos = -1;
   2479 
   2480                 $length = strlen($part);
   2481                 for ($i = 0; $i < $length; ++$i) {
   2482                     // Get the next char
   2483                     $char = $part[$i];
   2484                     $char_len = 1;
   2485 
   2486                     // update regexp comment cache if needed
   2487                     if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) {
   2488                         $next_comment_regexp_pos = $length;
   2489                         foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) {
   2490                             $match_i = false;
   2491                             if (isset($comment_regexp_cache_per_key[$comment_key]) &&
   2492                                 ($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i ||
   2493                                  $comment_regexp_cache_per_key[$comment_key]['pos'] === false)) {
   2494                                 // we have already matched something
   2495                                 if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) {
   2496                                     // this comment is never matched
   2497                                     continue;
   2498                                 }
   2499                                 $match_i = $comment_regexp_cache_per_key[$comment_key]['pos'];
   2500                             } elseif (
   2501                                 //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
   2502                                 (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $i), $match, PREG_OFFSET_CAPTURE)) ||
   2503                                 (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i))
   2504                                 ) {
   2505                                 $match_i = $match[0][1];
   2506                                 if (GESHI_PHP_PRE_433) {
   2507                                     $match_i += $i;
   2508                                 }
   2509 
   2510                                 $comment_regexp_cache_per_key[$comment_key] = array(
   2511                                     'key' => $comment_key,
   2512                                     'length' => strlen($match[0][0]),
   2513                                     'pos' => $match_i
   2514                                 );
   2515                             } else {
   2516                                 $comment_regexp_cache_per_key[$comment_key]['pos'] = false;
   2517                                 continue;
   2518                             }
   2519 
   2520                             if ($match_i !== false && $match_i < $next_comment_regexp_pos) {
   2521                                 $next_comment_regexp_pos = $match_i;
   2522                                 $next_comment_regexp_key = $comment_key;
   2523                                 if ($match_i === $i) {
   2524                                     break;
   2525                                 }
   2526                             }
   2527                         }
   2528                     }
   2529 
   2530                     $string_started = false;
   2531 
   2532                     if (isset($is_string_starter[$char])) {
   2533                         // Possibly the start of a new string ...
   2534 
   2535                         //Check which starter it was ...
   2536                         //Fix for SF#1932083: Multichar Quotemarks unsupported
   2537                         if (is_array($is_string_starter[$char])) {
   2538                             $char_new = '';
   2539                             foreach ($is_string_starter[$char] as $testchar) {
   2540                                 if ($testchar === substr($part, $i, strlen($testchar)) &&
   2541                                     strlen($testchar) > strlen($char_new)) {
   2542                                     $char_new = $testchar;
   2543                                     $string_started = true;
   2544                                 }
   2545                             }
   2546                             if ($string_started) {
   2547                                 $char = $char_new;
   2548                             }
   2549                         } else {
   2550                             $testchar = $is_string_starter[$char];
   2551                             if ($testchar === substr($part, $i, strlen($testchar))) {
   2552                                 $char = $testchar;
   2553                                 $string_started = true;
   2554                             }
   2555                         }
   2556                         $char_len = strlen($char);
   2557                     }
   2558 
   2559                     if ($string_started && ($i != $next_comment_regexp_pos)) {
   2560                         // Hand out the correct style information for this string
   2561                         $string_key = array_search($char, $this->language_data['QUOTEMARKS']);
   2562                         if (!isset($this->language_data['STYLES']['STRINGS'][$string_key]) ||
   2563                             !isset($this->language_data['STYLES']['ESCAPE_CHAR'][$string_key])) {
   2564                             $string_key = 0;
   2565                         }
   2566 
   2567                         // parse the stuff before this
   2568                         $result .= $this->parse_non_string_part($stuff_to_parse);
   2569                         $stuff_to_parse = '';
   2570 
   2571                         if (!$this->use_classes) {
   2572                             $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][$string_key] . '"';
   2573                         } else {
   2574                             $string_attributes = ' class="st'.$string_key.'"';
   2575                         }
   2576 
   2577                         // now handle the string
   2578                         $string = "<span$string_attributes>" . GeSHi::hsc($char);
   2579                         $start = $i + $char_len;
   2580                         $string_open = true;
   2581 
   2582                         if(empty($this->language_data['ESCAPE_REGEXP'])) {
   2583                             $next_escape_regexp_pos = $length;
   2584                         }
   2585 
   2586                         do {
   2587                             //Get the regular ending pos ...
   2588                             $close_pos = strpos($part, $char, $start);
   2589                             if(false === $close_pos) {
   2590                                 $close_pos = $length;
   2591                             }
   2592 
   2593                             if($this->lexic_permissions['ESCAPE_CHAR']) {
   2594                                 // update escape regexp cache if needed
   2595                                 if (isset($this->language_data['ESCAPE_REGEXP']) && $next_escape_regexp_pos < $start) {
   2596                                     $next_escape_regexp_pos = $length;
   2597                                     foreach ($this->language_data['ESCAPE_REGEXP'] as $escape_key => $regexp) {
   2598                                         $match_i = false;
   2599                                         if (isset($escape_regexp_cache_per_key[$escape_key]) &&
   2600                                             ($escape_regexp_cache_per_key[$escape_key]['pos'] >= $start ||
   2601                                              $escape_regexp_cache_per_key[$escape_key]['pos'] === false)) {
   2602                                             // we have already matched something
   2603                                             if ($escape_regexp_cache_per_key[$escape_key]['pos'] === false) {
   2604                                                 // this comment is never matched
   2605                                                 continue;
   2606                                             }
   2607                                             $match_i = $escape_regexp_cache_per_key[$escape_key]['pos'];
   2608                                         } elseif (
   2609                                             //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
   2610                                             (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $start), $match, PREG_OFFSET_CAPTURE)) ||
   2611                                             (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $start))
   2612                                             ) {
   2613                                             $match_i = $match[0][1];
   2614                                             if (GESHI_PHP_PRE_433) {
   2615                                                 $match_i += $start;
   2616                                             }
   2617 
   2618                                             $escape_regexp_cache_per_key[$escape_key] = array(
   2619                                                 'key' => $escape_key,
   2620                                                 'length' => strlen($match[0][0]),
   2621                                                 'pos' => $match_i
   2622                                             );
   2623                                         } else {
   2624                                             $escape_regexp_cache_per_key[$escape_key]['pos'] = false;
   2625                                             continue;
   2626                                         }
   2627 
   2628                                         if ($match_i !== false && $match_i < $next_escape_regexp_pos) {
   2629                                             $next_escape_regexp_pos = $match_i;
   2630                                             $next_escape_regexp_key = $escape_key;
   2631                                             if ($match_i === $start) {
   2632                                                 break;
   2633                                             }
   2634                                         }
   2635                                     }
   2636                                 }
   2637 
   2638                                 //Find the next simple escape position
   2639                                 if('' != $this->language_data['ESCAPE_CHAR']) {
   2640                                     $simple_escape = strpos($part, $this->language_data['ESCAPE_CHAR'], $start);
   2641                                     if(false === $simple_escape) {
   2642                                         $simple_escape = $length;
   2643                                     }
   2644                                 } else {
   2645                                     $simple_escape = $length;
   2646                                 }
   2647                             } else {
   2648                                 $next_escape_regexp_pos = $length;
   2649                                 $simple_escape = $length;
   2650                             }
   2651 
   2652                             if($simple_escape < $next_escape_regexp_pos &&
   2653                                 $simple_escape < $length &&
   2654                                 $simple_escape < $close_pos) {
   2655                                 //The nexxt escape sequence is a simple one ...
   2656                                 $es_pos = $simple_escape;
   2657 
   2658                                 //Add the stuff not in the string yet ...
   2659                                 $string .= $this->hsc(substr($part, $start, $es_pos - $start));
   2660 
   2661                                 //Get the style for this escaped char ...
   2662                                 if (!$this->use_classes) {
   2663                                     $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"';
   2664                                 } else {
   2665                                     $escape_char_attributes = ' class="es0"';
   2666                                 }
   2667 
   2668                                 //Add the style for the escape char ...
   2669                                 $string .= "<span$escape_char_attributes>" .
   2670                                     GeSHi::hsc($this->language_data['ESCAPE_CHAR']);
   2671 
   2672                                 //Get the byte AFTER the ESCAPE_CHAR we just found
   2673                                 $es_char = $part[$es_pos + 1];
   2674                                 if ($es_char == "\n") {
   2675                                     // don't put a newline around newlines
   2676                                     $string .= "</span>\n";
   2677                                     $start = $es_pos + 2;
   2678                                 } elseif (ord($es_char) >= 128) {
   2679                                     //This is an non-ASCII char (UTF8 or single byte)
   2680                                     //This code tries to work around SF#2037598 ...
   2681                                     if(function_exists('mb_substr')) {
   2682                                         $es_char_m = mb_substr(substr($part, $es_pos+1, 16), 0, 1, $this->encoding);
   2683                                         $string .= $es_char_m . '</span>';
   2684                                     } elseif (!GESHI_PHP_PRE_433 && 'utf-8' == $this->encoding) {
   2685                                         if(preg_match("/[\xC2-\xDF][\x80-\xBF]".
   2686                                             "|\xE0[\xA0-\xBF][\x80-\xBF]".
   2687                                             "|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}".
   2688                                             "|\xED[\x80-\x9F][\x80-\xBF]".
   2689                                             "|\xF0[\x90-\xBF][\x80-\xBF]{2}".
   2690                                             "|[\xF1-\xF3][\x80-\xBF]{3}".
   2691                                             "|\xF4[\x80-\x8F][\x80-\xBF]{2}/s",
   2692                                             $part, $es_char_m, null, $es_pos + 1)) {
   2693                                             $es_char_m = $es_char_m[0];
   2694                                         } else {
   2695                                             $es_char_m = $es_char;
   2696                                         }
   2697                                         $string .= $this->hsc($es_char_m) . '</span>';
   2698                                     } else {
   2699                                         $es_char_m = $this->hsc($es_char);
   2700                                     }
   2701                                     $start = $es_pos + strlen($es_char_m) + 1;
   2702                                 } else {
   2703                                     $string .= $this->hsc($es_char) . '</span>';
   2704                                     $start = $es_pos + 2;
   2705                                 }
   2706                             } elseif ($next_escape_regexp_pos < $length &&
   2707                                 $next_escape_regexp_pos < $close_pos) {
   2708                                 $es_pos = $next_escape_regexp_pos;
   2709                                 //Add the stuff not in the string yet ...
   2710                                 $string .= $this->hsc(substr($part, $start, $es_pos - $start));
   2711 
   2712                                 //Get the key and length of this match ...
   2713                                 $escape = $escape_regexp_cache_per_key[$next_escape_regexp_key];
   2714                                 $escape_str = substr($part, $es_pos, $escape['length']);
   2715                                 $escape_key = $escape['key'];
   2716 
   2717                                 //Get the style for this escaped char ...
   2718                                 if (!$this->use_classes) {
   2719                                     $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][$escape_key] . '"';
   2720                                 } else {
   2721                                     $escape_char_attributes = ' class="es' . $escape_key . '"';
   2722                                 }
   2723 
   2724                                 //Add the style for the escape char ...
   2725                                 $string .= "<span$escape_char_attributes>" .
   2726                                     $this->hsc($escape_str) . '</span>';
   2727 
   2728                                 $start = $es_pos + $escape['length'];
   2729                             } else {
   2730                                 //Copy the remainder of the string ...
   2731                                 $string .= $this->hsc(substr($part, $start, $close_pos - $start + $char_len)) . '</span>';
   2732                                 $start = $close_pos + $char_len;
   2733                                 $string_open = false;
   2734                             }
   2735                         } while($string_open);
   2736 
   2737                         if ($check_linenumbers) {
   2738                             // Are line numbers used? If, we should end the string before
   2739                             // the newline and begin it again (so when <li>s are put in the source
   2740                             // remains XHTML compliant)
   2741                             // note to self: This opens up possibility of config files specifying
   2742                             // that languages can/cannot have multiline strings???
   2743                             $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
   2744                         }
   2745 
   2746                         $result .= $string;
   2747                         $string = '';
   2748                         $i = $start - 1;
   2749                         continue;
   2750                     } elseif ($this->lexic_permissions['STRINGS'] && $hq && $hq[0] == $char &&
   2751                         substr($part, $i, $hq_strlen) == $hq && ($i != $next_comment_regexp_pos)) {
   2752                         // The start of a hard quoted string
   2753                         if (!$this->use_classes) {
   2754                             $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS']['HARD'] . '"';
   2755                             $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR']['HARD'] . '"';
   2756                         } else {
   2757                             $string_attributes = ' class="st_h"';
   2758                             $escape_char_attributes = ' class="es_h"';
   2759                         }
   2760                         // parse the stuff before this
   2761                         $result .= $this->parse_non_string_part($stuff_to_parse);
   2762                         $stuff_to_parse = '';
   2763 
   2764                         // now handle the string
   2765                         $string = '';
   2766 
   2767                         // look for closing quote
   2768                         $start = $i + $hq_strlen;
   2769                         while ($close_pos = strpos($part, $this->language_data['HARDQUOTE'][1], $start)) {
   2770                             $start = $close_pos + 1;
   2771                             if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['HARDCHAR'] &&
   2772                                 (($i + $hq_strlen) != ($close_pos))) { //Support empty string for HQ escapes if Starter = Escape
   2773                                 // make sure this quote is not escaped
   2774                                 foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
   2775                                     if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) {
   2776                                         // check wether this quote is escaped or if it is something like '\\'
   2777                                         $escape_char_pos = $close_pos - 1;
   2778                                         while ($escape_char_pos > 0
   2779                                                 && $part[$escape_char_pos - 1] == $this->language_data['HARDCHAR']) {
   2780                                             --$escape_char_pos;
   2781                                         }
   2782                                         if (($close_pos - $escape_char_pos) & 1) {
   2783                                             // uneven number of escape chars => this quote is escaped
   2784                                             continue 2;
   2785                                         }
   2786                                     }
   2787                                 }
   2788                             }
   2789 
   2790                             // found closing quote
   2791                             break;
   2792                         }
   2793 
   2794                         //Found the closing delimiter?
   2795                         if (!$close_pos) {
   2796                             // span till the end of this $part when no closing delimiter is found
   2797                             $close_pos = $length;
   2798                         }
   2799 
   2800                         //Get the actual string
   2801                         $string = substr($part, $i, $close_pos - $i + 1);
   2802                         $i = $close_pos;
   2803 
   2804                         // handle escape chars and encode html chars
   2805                         // (special because when we have escape chars within our string they may not be escaped)
   2806                         if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) {
   2807                             $start = 0;
   2808                             $new_string = '';
   2809                             while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) {
   2810                                 // hmtl escape stuff before
   2811                                 $new_string .= $this->hsc(substr($string, $start, $es_pos - $start));
   2812                                 // check if this is a hard escape
   2813                                 foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
   2814                                     if (substr($string, $es_pos, strlen($hardescape)) == $hardescape) {
   2815                                         // indeed, this is a hardescape
   2816                                         $new_string .= "<span$escape_char_attributes>" .
   2817                                             $this->hsc($hardescape) . '</span>';
   2818                                         $start = $es_pos + strlen($hardescape);
   2819                                         continue 2;
   2820                                     }
   2821                                 }
   2822                                 // not a hard escape, but a normal escape
   2823                                 // they come in pairs of two
   2824                                 $c = 0;
   2825                                 while (isset($string[$es_pos + $c]) && isset($string[$es_pos + $c + 1])
   2826                                     && $string[$es_pos + $c] == $this->language_data['ESCAPE_CHAR']
   2827                                     && $string[$es_pos + $c + 1] == $this->language_data['ESCAPE_CHAR']) {
   2828                                     $c += 2;
   2829                                 }
   2830                                 if ($c) {
   2831                                     $new_string .= "<span$escape_char_attributes>" .
   2832                                         str_repeat($escaped_escape_char, $c) .
   2833                                         '</span>';
   2834                                     $start = $es_pos + $c;
   2835                                 } else {
   2836                                     // this is just a single lonely escape char...
   2837                                     $new_string .= $escaped_escape_char;
   2838                                     $start = $es_pos + 1;
   2839                                 }
   2840                             }
   2841                             $string = $new_string . $this->hsc(substr($string, $start));
   2842                         } else {
   2843                             $string = $this->hsc($string);
   2844                         }
   2845 
   2846                         if ($check_linenumbers) {
   2847                             // Are line numbers used? If, we should end the string before
   2848                             // the newline and begin it again (so when <li>s are put in the source
   2849                             // remains XHTML compliant)
   2850                             // note to self: This opens up possibility of config files specifying
   2851                             // that languages can/cannot have multiline strings???
   2852                             $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
   2853                         }
   2854 
   2855                         $result .= "<span$string_attributes>" . $string . '</span>';
   2856                         $string = '';
   2857                         continue;
   2858                     } else {
   2859                         //Have a look for regexp comments
   2860                         if ($i == $next_comment_regexp_pos) {
   2861                             $COMMENT_MATCHED = true;
   2862                             $comment = $comment_regexp_cache_per_key[$next_comment_regexp_key];
   2863                             $test_str = $this->hsc(substr($part, $i, $comment['length']));
   2864 
   2865                             //@todo If remove important do remove here
   2866                             if ($this->lexic_permissions['COMMENTS']['MULTI']) {
   2867                                 if (!$this->use_classes) {
   2868                                     $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment['key']] . '"';
   2869                                 } else {
   2870                                     $attributes = ' class="co' . $comment['key'] . '"';
   2871                                 }
   2872 
   2873                                 $test_str = "<span$attributes>" . $test_str . "</span>";
   2874 
   2875                                 // Short-cut through all the multiline code
   2876                                 if ($check_linenumbers) {
   2877                                     // strreplace to put close span and open span around multiline newlines
   2878                                     $test_str = str_replace(
   2879                                         "\n", "</span>\n<span$attributes>",
   2880                                         str_replace("\n ", "\n&nbsp;", $test_str)
   2881                                     );
   2882                                 }
   2883                             }
   2884 
   2885                             $i += $comment['length'] - 1;
   2886 
   2887                             // parse the rest
   2888                             $result .= $this->parse_non_string_part($stuff_to_parse);
   2889                             $stuff_to_parse = '';
   2890                         }
   2891 
   2892                         // If we haven't matched a regexp comment, try multi-line comments
   2893                         if (!$COMMENT_MATCHED) {
   2894                             // Is this a multiline comment?
   2895                             if (!empty($this->language_data['COMMENT_MULTI']) && $next_comment_multi_pos < $i) {
   2896                                 $next_comment_multi_pos = $length;
   2897                                 foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) {
   2898                                     $match_i = false;
   2899                                     if (isset($comment_multi_cache_per_key[$open]) &&
   2900                                         ($comment_multi_cache_per_key[$open] >= $i ||
   2901                                          $comment_multi_cache_per_key[$open] === false)) {
   2902                                         // we have already matched something
   2903                                         if ($comment_multi_cache_per_key[$open] === false) {
   2904                                             // this comment is never matched
   2905                                             continue;
   2906                                         }
   2907                                         $match_i = $comment_multi_cache_per_key[$open];
   2908                                     } elseif (($match_i = stripos($part, $open, $i)) !== false) {
   2909                                         $comment_multi_cache_per_key[$open] = $match_i;
   2910                                     } else {
   2911                                         $comment_multi_cache_per_key[$open] = false;
   2912                                         continue;
   2913                                     }
   2914                                     if ($match_i !== false && $match_i < $next_comment_multi_pos) {
   2915                                         $next_comment_multi_pos = $match_i;
   2916                                         $next_open_comment_multi = $open;
   2917                                         if ($match_i === $i) {
   2918                                             break;
   2919                                         }
   2920                                     }
   2921                                 }
   2922                             }
   2923                             if ($i == $next_comment_multi_pos) {
   2924                                 $open = $next_open_comment_multi;
   2925                                 $close = $this->language_data['COMMENT_MULTI'][$open];
   2926                                 $open_strlen = strlen($open);
   2927                                 $close_strlen = strlen($close);
   2928                                 $COMMENT_MATCHED = true;
   2929                                 $test_str_match = $open;
   2930                                 //@todo If remove important do remove here
   2931                                 if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
   2932                                     $open == GESHI_START_IMPORTANT) {
   2933                                     if ($open != GESHI_START_IMPORTANT) {
   2934                                         if (!$this->use_classes) {
   2935                                             $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"';
   2936                                         } else {
   2937                                             $attributes = ' class="coMULTI"';
   2938                                         }
   2939                                         $test_str = "<span$attributes>" . $this->hsc($open);
   2940                                     } else {
   2941                                         if (!$this->use_classes) {
   2942                                             $attributes = ' style="' . $this->important_styles . '"';
   2943                                         } else {
   2944                                             $attributes = ' class="imp"';
   2945                                         }
   2946 
   2947                                         // We don't include the start of the comment if it's an
   2948                                         // "important" part
   2949                                         $test_str = "<span$attributes>";
   2950                                     }
   2951                                 } else {
   2952                                     $test_str = $this->hsc($open);
   2953                                 }
   2954 
   2955                                 $close_pos = strpos( $part, $close, $i + $open_strlen );
   2956 
   2957                                 if ($close_pos === false) {
   2958                                     $close_pos = $length;
   2959                                 }
   2960 
   2961                                 // Short-cut through all the multiline code
   2962                                 $rest_of_comment = $this->hsc(substr($part, $i + $open_strlen, $close_pos - $i - $open_strlen + $close_strlen));
   2963                                 if (($this->lexic_permissions['COMMENTS']['MULTI'] ||
   2964                                     $test_str_match == GESHI_START_IMPORTANT) &&
   2965                                     $check_linenumbers) {
   2966 
   2967                                     // strreplace to put close span and open span around multiline newlines
   2968                                     $test_str .= str_replace(
   2969                                         "\n", "</span>\n<span$attributes>",
   2970                                         str_replace("\n ", "\n&nbsp;", $rest_of_comment)
   2971                                     );
   2972                                 } else {
   2973                                     $test_str .= $rest_of_comment;
   2974                                 }
   2975 
   2976                                 if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
   2977                                     $test_str_match == GESHI_START_IMPORTANT) {
   2978                                     $test_str .= '</span>';
   2979                                 }
   2980 
   2981                                 $i = $close_pos + $close_strlen - 1;
   2982 
   2983                                 // parse the rest
   2984                                 $result .= $this->parse_non_string_part($stuff_to_parse);
   2985                                 $stuff_to_parse = '';
   2986                             }
   2987                         }
   2988 
   2989                         // If we haven't matched a multiline comment, try single-line comments
   2990                         if (!$COMMENT_MATCHED) {
   2991                             // cache potential single line comment occurances
   2992                             if (!empty($this->language_data['COMMENT_SINGLE']) && $next_comment_single_pos < $i) {
   2993                                 $next_comment_single_pos = $length;
   2994                                 foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key => $comment_mark) {
   2995                                     $match_i = false;
   2996                                     if (isset($comment_single_cache_per_key[$comment_key]) &&
   2997                                         ($comment_single_cache_per_key[$comment_key] >= $i ||
   2998                                          $comment_single_cache_per_key[$comment_key] === false)) {
   2999                                         // we have already matched something
   3000                                         if ($comment_single_cache_per_key[$comment_key] === false) {
   3001                                             // this comment is never matched
   3002                                             continue;
   3003                                         }
   3004                                         $match_i = $comment_single_cache_per_key[$comment_key];
   3005                                     } elseif (
   3006                                         // case sensitive comments
   3007                                         ($this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
   3008                                         ($match_i = stripos($part, $comment_mark, $i)) !== false) ||
   3009                                         // non case sensitive
   3010                                         (!$this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
   3011                                           (($match_i = strpos($part, $comment_mark, $i)) !== false))) {
   3012                                         $comment_single_cache_per_key[$comment_key] = $match_i;
   3013                                     } else {
   3014                                         $comment_single_cache_per_key[$comment_key] = false;
   3015                                         continue;
   3016                                     }
   3017                                     if ($match_i !== false && $match_i < $next_comment_single_pos) {
   3018                                         $next_comment_single_pos = $match_i;
   3019                                         $next_comment_single_key = $comment_key;
   3020                                         if ($match_i === $i) {
   3021                                             break;
   3022                                         }
   3023                                     }
   3024                                 }
   3025                             }
   3026                             if ($next_comment_single_pos == $i) {
   3027                                 $comment_key = $next_comment_single_key;
   3028                                 $comment_mark = $this->language_data['COMMENT_SINGLE'][$comment_key];
   3029                                 $com_len = strlen($comment_mark);
   3030 
   3031                                 // This check will find special variables like $# in bash
   3032                                 // or compiler directives of Delphi beginning {$
   3033                                 if ((empty($sc_disallowed_before) || ($i == 0) ||
   3034                                     (false === strpos($sc_disallowed_before, $part[$i-1]))) &&
   3035                                     (empty($sc_disallowed_after) || ($length <= $i + $com_len) ||
   3036                                     (false === strpos($sc_disallowed_after, $part[$i + $com_len]))))
   3037                                 {
   3038                                     // this is a valid comment
   3039                                     $COMMENT_MATCHED = true;
   3040                                     if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
   3041                                         if (!$this->use_classes) {
   3042                                             $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment_key] . '"';
   3043                                         } else {
   3044                                             $attributes = ' class="co' . $comment_key . '"';
   3045                                         }
   3046                                         $test_str = "<span$attributes>" . $this->hsc($this->change_case($comment_mark));
   3047                                     } else {
   3048                                         $test_str = $this->hsc($comment_mark);
   3049                                     }
   3050 
   3051                                     //Check if this comment is the last in the source
   3052                                     $close_pos = strpos($part, "\n", $i);
   3053                                     $oops = false;
   3054                                     if ($close_pos === false) {
   3055                                         $close_pos = $length;
   3056                                         $oops = true;
   3057                                     }
   3058                                     $test_str .= $this->hsc(substr($part, $i + $com_len, $close_pos - $i - $com_len));
   3059                                     if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
   3060                                         $test_str .= "</span>";
   3061                                     }
   3062 
   3063                                     // Take into account that the comment might be the last in the source
   3064                                     if (!$oops) {
   3065                                       $test_str .= "\n";
   3066                                     }
   3067 
   3068                                     $i = $close_pos;
   3069 
   3070                                     // parse the rest
   3071                                     $result .= $this->parse_non_string_part($stuff_to_parse);
   3072                                     $stuff_to_parse = '';
   3073                                 }
   3074                             }
   3075                         }
   3076                     }
   3077 
   3078                     // Where are we adding this char?
   3079                     if (!$COMMENT_MATCHED) {
   3080                         $stuff_to_parse .= $char;
   3081                     } else {
   3082                         $result .= $test_str;
   3083                         unset($test_str);
   3084                         $COMMENT_MATCHED = false;
   3085                     }
   3086                 }
   3087                 // Parse the last bit
   3088                 $result .= $this->parse_non_string_part($stuff_to_parse);
   3089                 $stuff_to_parse = '';
   3090             } else {
   3091                 $result .= $this->hsc($part);
   3092             }
   3093             // Close the <span> that surrounds the block
   3094             if ($STRICTATTRS != '') {
   3095                 $result = str_replace("\n", "</span>\n<span$STRICTATTRS>", $result);
   3096                 $result .= '</span>';
   3097             }
   3098 
   3099             $endresult .= $result;
   3100             unset($part, $parts[$key], $result);
   3101         }
   3102 
   3103         //This fix is related to SF#1923020, but has to be applied regardless of
   3104         //actually highlighting symbols.
   3105         /** NOTE: memorypeak #3 */
   3106         $endresult = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $endresult);
   3107 
   3108 //        // Parse the last stuff (redundant?)
   3109 //        $result .= $this->parse_non_string_part($stuff_to_parse);
   3110 
   3111         // Lop off the very first and last spaces
   3112 //        $result = substr($result, 1, -1);
   3113 
   3114         // We're finished: stop timing
   3115         $this->set_time($start_time, microtime());
   3116 
   3117         $this->finalise($endresult);
   3118         return $endresult;
   3119     }
   3120 
   3121     /**
   3122      * Swaps out spaces and tabs for HTML indentation. Not needed if
   3123      * the code is in a pre block...
   3124      *
   3125      * @param  string The source to indent (reference!)
   3126      * @since  1.0.0
   3127      * @access private
   3128      */
   3129     function indent(&$result) {
   3130         /// Replace tabs with the correct number of spaces
   3131         if (false !== strpos($result, "\t")) {
   3132             $lines = explode("\n", $result);
   3133             $result = null;//Save memory while we process the lines individually
   3134             $tab_width = $this->get_real_tab_width();
   3135             $tab_string = '&nbsp;' . str_repeat(' ', $tab_width);
   3136 
   3137             for ($key = 0, $n = count($lines); $key < $n; $key++) {
   3138                 $line = $lines[$key];
   3139                 if (false === strpos($line, "\t")) {
   3140                     continue;
   3141                 }
   3142 
   3143                 $pos = 0;
   3144                 $length = strlen($line);
   3145                 $lines[$key] = ''; // reduce memory
   3146 
   3147                 $IN_TAG = false;
   3148                 for ($i = 0; $i < $length; ++$i) {
   3149                     $char = $line[$i];
   3150                     // Simple engine to work out whether we're in a tag.
   3151                     // If we are we modify $pos. This is so we ignore HTML
   3152                     // in the line and only workout the tab replacement
   3153                     // via the actual content of the string
   3154                     // This test could be improved to include strings in the
   3155                     // html so that < or > would be allowed in user's styles
   3156                     // (e.g. quotes: '<' '>'; or similar)
   3157                     if ($IN_TAG) {
   3158                         if ('>' == $char) {
   3159                             $IN_TAG = false;
   3160                         }
   3161                         $lines[$key] .= $char;
   3162                     } elseif ('<' == $char) {
   3163                         $IN_TAG = true;
   3164                         $lines[$key] .= '<';
   3165                     } elseif ('&' == $char) {
   3166                         $substr = substr($line, $i + 3, 5);
   3167                         $posi = strpos($substr, ';');
   3168                         if (false === $posi) {
   3169                             ++$pos;
   3170                         } else {
   3171                             $pos -= $posi+2;
   3172                         }
   3173                         $lines[$key] .= $char;
   3174                     } elseif ("\t" == $char) {
   3175                         $str = '';
   3176                         // OPTIMISE - move $strs out. Make an array:
   3177                         // $tabs = array(
   3178                         //  1 => '&nbsp;',
   3179                         //  2 => '&nbsp; ',
   3180                         //  3 => '&nbsp; &nbsp;' etc etc
   3181                         // to use instead of building a string every time
   3182                         $tab_end_width = $tab_width - ($pos % $tab_width); //Moved out of the look as it doesn't change within the loop
   3183                         if (($pos & 1) || 1 == $tab_end_width) {
   3184                             $str .= substr($tab_string, 6, $tab_end_width);
   3185                         } else {
   3186                             $str .= substr($tab_string, 0, $tab_end_width+5);
   3187                         }
   3188                         $lines[$key] .= $str;
   3189                         $pos += $tab_end_width;
   3190 
   3191                         if (false === strpos($line, "\t", $i + 1)) {
   3192                             $lines[$key] .= substr($line, $i + 1);
   3193                             break;
   3194                         }
   3195                     } elseif (0 == $pos && ' ' == $char) {
   3196                         $lines[$key] .= '&nbsp;';
   3197                         ++$pos;
   3198                     } else {
   3199                         $lines[$key] .= $char;
   3200                         ++$pos;
   3201                     }
   3202                 }
   3203             }
   3204             $result = implode("\n", $lines);
   3205             unset($lines);//We don't need the lines separated beyond this --- free them!
   3206         }
   3207         // Other whitespace
   3208         // BenBE: Fix to reduce the number of replacements to be done
   3209         $result = preg_replace('/^ /m', '&nbsp;', $result);
   3210         $result = str_replace('  ', ' &nbsp;', $result);
   3211 
   3212         if ($this->line_numbers == GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
   3213             if ($this->line_ending === null) {
   3214                 $result = nl2br($result);
   3215             } else {
   3216                 $result = str_replace("\n", $this->line_ending, $result);
   3217             }
   3218         }
   3219     }
   3220 
   3221     /**
   3222      * Changes the case of a keyword for those languages where a change is asked for
   3223      *
   3224      * @param  string The keyword to change the case of
   3225      * @return string The keyword with its case changed
   3226      * @since  1.0.0
   3227      * @access private
   3228      */
   3229     function change_case($instr) {
   3230         switch ($this->language_data['CASE_KEYWORDS']) {
   3231             case GESHI_CAPS_UPPER:
   3232                 return strtoupper($instr);
   3233             case GESHI_CAPS_LOWER:
   3234                 return strtolower($instr);
   3235             default:
   3236                 return $instr;
   3237         }
   3238     }
   3239 
   3240     /**
   3241      * Handles replacements of keywords to include markup and links if requested
   3242      *
   3243      * @param  string The keyword to add the Markup to
   3244      * @return The HTML for the match found
   3245      * @since  1.0.8
   3246      * @access private
   3247      *
   3248      * @todo   Get rid of ender in keyword links
   3249      */
   3250     function handle_keyword_replace($match) {
   3251         $k = $this->_kw_replace_group;
   3252         $keyword = $match[0];
   3253         $keyword_match = $match[1];
   3254 
   3255         $before = '';
   3256         $after = '';
   3257 
   3258         if ($this->keyword_links) {
   3259             // Keyword links have been ebabled
   3260 
   3261             if (isset($this->language_data['URLS'][$k]) &&
   3262                 $this->language_data['URLS'][$k] != '') {
   3263                 // There is a base group for this keyword
   3264 
   3265                 // Old system: strtolower
   3266                 //$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword);
   3267                 // New system: get keyword from language file to get correct case
   3268                 if (!$this->language_data['CASE_SENSITIVE'][$k] &&
   3269                     strpos($this->language_data['URLS'][$k], '{FNAME}') !== false) {
   3270                     foreach ($this->language_data['KEYWORDS'][$k] as $word) {
   3271                         if (strcasecmp($word, $keyword_match) == 0) {
   3272                             break;
   3273                         }
   3274                     }
   3275                 } else {
   3276                     $word = $keyword_match;
   3277                 }
   3278 
   3279                 $before = '<|UR1|"' .
   3280                     str_replace(
   3281                         array(
   3282                             '{FNAME}',
   3283                             '{FNAMEL}',
   3284                             '{FNAMEU}',
   3285                             '.'),
   3286                         array(
   3287                             str_replace('+', '%20', urlencode($this->hsc($word))),
   3288                             str_replace('+', '%20', urlencode($this->hsc(strtolower($word)))),
   3289                             str_replace('+', '%20', urlencode($this->hsc(strtoupper($word)))),
   3290                             '<DOT>'),
   3291                         $this->language_data['URLS'][$k]
   3292                     ) . '">';
   3293                 $after = '</a>';
   3294             }
   3295         }
   3296 
   3297         return $before . '<|/'. $k .'/>' . $this->change_case($keyword) . '|>' . $after;
   3298     }
   3299 
   3300     /**
   3301      * handles regular expressions highlighting-definitions with callback functions
   3302      *
   3303      * @note this is a callback, don't use it directly
   3304      *
   3305      * @param array the matches array
   3306      * @return The highlighted string
   3307      * @since 1.0.8
   3308      * @access private
   3309      */
   3310     function handle_regexps_callback($matches) {
   3311         // before: "' style=\"' . call_user_func(\"$func\", '\\1') . '\"\\1|>'",
   3312         return  ' style="' . call_user_func($this->language_data['STYLES']['REGEXPS'][$this->_rx_key], $matches[1]) . '"'. $matches[1] . '|>';
   3313     }
   3314 
   3315     /**
   3316      * handles newlines in REGEXPS matches. Set the _hmr_* vars before calling this
   3317      *
   3318      * @note this is a callback, don't use it directly
   3319      *
   3320      * @param array the matches array
   3321      * @return string
   3322      * @since 1.0.8
   3323      * @access private
   3324      */
   3325     function handle_multiline_regexps($matches) {
   3326         $before = $this->_hmr_before;
   3327         $after = $this->_hmr_after;
   3328         if ($this->_hmr_replace) {
   3329             $replace = $this->_hmr_replace;
   3330             $search = array();
   3331 
   3332             foreach (array_keys($matches) as $k) {
   3333                 $search[] = '\\' . $k;
   3334             }
   3335 
   3336             $before = str_replace($search, $matches, $before);
   3337             $after = str_replace($search, $matches, $after);
   3338             $replace = str_replace($search, $matches, $replace);
   3339         } else {
   3340             $replace = $matches[0];
   3341         }
   3342         return $before
   3343                     . '<|!REG3XP' . $this->_hmr_key .'!>'
   3344                         . str_replace("\n", "|>\n<|!REG3XP" . $this->_hmr_key . '!>', $replace)
   3345                     . '|>'
   3346               . $after;
   3347     }
   3348 
   3349     /**
   3350      * Takes a string that has no strings or comments in it, and highlights
   3351      * stuff like keywords, numbers and methods.
   3352      *
   3353      * @param string The string to parse for keyword, numbers etc.
   3354      * @since 1.0.0
   3355      * @access private
   3356      * @todo BUGGY! Why? Why not build string and return?
   3357      */
   3358     function parse_non_string_part($stuff_to_parse) {
   3359         $stuff_to_parse = ' ' . $this->hsc($stuff_to_parse);
   3360 
   3361         // Highlight keywords
   3362         $disallowed_before = "(?<![a-zA-Z0-9\$_\|\#|^&";
   3363         $disallowed_after = "(?![a-zA-Z0-9_\|%\\-&;";
   3364         if ($this->lexic_permissions['STRINGS']) {
   3365             $quotemarks = preg_quote(implode($this->language_data['QUOTEMARKS']), '/');
   3366             $disallowed_before .= $quotemarks;
   3367             $disallowed_after .= $quotemarks;
   3368         }
   3369         $disallowed_before .= "])";
   3370         $disallowed_after .= "])";
   3371 
   3372         $parser_control_pergroup = false;
   3373         if (isset($this->language_data['PARSER_CONTROL'])) {
   3374             if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
   3375                 $x = 0; // check wether per-keyword-group parser_control is enabled
   3376                 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'])) {
   3377                     $disallowed_before = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'];
   3378                     ++$x;
   3379                 }
   3380                 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'])) {
   3381                     $disallowed_after = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'];
   3382                     ++$x;
   3383                 }
   3384                 $parser_control_pergroup = (count($this->language_data['PARSER_CONTROL']['KEYWORDS']) - $x) > 0;
   3385             }
   3386         }
   3387 
   3388         foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
   3389             if (!isset($this->lexic_permissions['KEYWORDS'][$k]) ||
   3390                 $this->lexic_permissions['KEYWORDS'][$k]) {
   3391 
   3392                 $case_sensitive = $this->language_data['CASE_SENSITIVE'][$k];
   3393                 $modifiers = $case_sensitive ? '' : 'i';
   3394 
   3395                 // NEW in 1.0.8 - per-keyword-group parser control
   3396                 $disallowed_before_local = $disallowed_before;
   3397                 $disallowed_after_local = $disallowed_after;
   3398                 if ($parser_control_pergroup && isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k])) {
   3399                     if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'])) {
   3400                         $disallowed_before_local =
   3401                             $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'];
   3402                     }
   3403 
   3404                     if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'])) {
   3405                         $disallowed_after_local =
   3406                             $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'];
   3407                     }
   3408                 }
   3409 
   3410                 $this->_kw_replace_group = $k;
   3411 
   3412                 //NEW in 1.0.8, the cached regexp list
   3413                 // since we don't want PHP / PCRE to crash due to too large patterns we split them into smaller chunks
   3414                 for ($set = 0, $set_length = count($this->language_data['CACHED_KEYWORD_LISTS'][$k]); $set <  $set_length; ++$set) {
   3415                     $keywordset =& $this->language_data['CACHED_KEYWORD_LISTS'][$k][$set];
   3416                     // Might make a more unique string for putting the number in soon
   3417                     // Basically, we don't put the styles in yet because then the styles themselves will
   3418                     // get highlighted if the language has a CSS keyword in it (like CSS, for example ;))
   3419                     $stuff_to_parse = preg_replace_callback(
   3420                         "/$disallowed_before_local({$keywordset})(?!\<DOT\>(?:htm|php|aspx?))$disallowed_after_local/$modifiers",
   3421                         array($this, 'handle_keyword_replace'),
   3422                         $stuff_to_parse
   3423                         );
   3424                 }
   3425             }
   3426         }
   3427 
   3428         // Regular expressions
   3429         foreach ($this->language_data['REGEXPS'] as $key => $regexp) {
   3430             if ($this->lexic_permissions['REGEXPS'][$key]) {
   3431                 if (is_array($regexp)) {
   3432                     if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
   3433                         // produce valid HTML when we match multiple lines
   3434                         $this->_hmr_replace = $regexp[GESHI_REPLACE];
   3435                         $this->_hmr_before = $regexp[GESHI_BEFORE];
   3436                         $this->_hmr_key = $key;
   3437                         $this->_hmr_after = $regexp[GESHI_AFTER];
   3438                         $stuff_to_parse = preg_replace_callback(
   3439                             "/" . $regexp[GESHI_SEARCH] . "/{$regexp[GESHI_MODIFIERS]}",
   3440                             array($this, 'handle_multiline_regexps'),
   3441                             $stuff_to_parse);
   3442                         $this->_hmr_replace = false;
   3443                         $this->_hmr_before = '';
   3444                         $this->_hmr_after = '';
   3445                     } else {
   3446                         $stuff_to_parse = preg_replace(
   3447                             '/' . $regexp[GESHI_SEARCH] . '/' . $regexp[GESHI_MODIFIERS],
   3448                             $regexp[GESHI_BEFORE] . '<|!REG3XP'. $key .'!>' . $regexp[GESHI_REPLACE] . '|>' . $regexp[GESHI_AFTER],
   3449                             $stuff_to_parse);
   3450                     }
   3451                 } else {
   3452                     if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
   3453                         // produce valid HTML when we match multiple lines
   3454                         $this->_hmr_key = $key;
   3455                         $stuff_to_parse = preg_replace_callback( "/(" . $regexp . ")/",
   3456                                               array($this, 'handle_multiline_regexps'), $stuff_to_parse);
   3457                         $this->_hmr_key = '';
   3458                     } else {
   3459                         $stuff_to_parse = preg_replace( "/(" . $regexp . ")/", "<|!REG3XP$key!>\\1|>", $stuff_to_parse);
   3460                     }
   3461                 }
   3462             }
   3463         }
   3464 
   3465         // Highlight numbers. As of 1.0.8 we support different types of numbers
   3466         $numbers_found = false;
   3467 
   3468         if ($this->lexic_permissions['NUMBERS'] && preg_match($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'], $stuff_to_parse )) {
   3469             $numbers_found = true;
   3470 
   3471             //For each of the formats ...
   3472             foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
   3473                 //Check if it should be highlighted ...
   3474                 $stuff_to_parse = preg_replace($regexp, "<|/NUM!$id/>\\1|>", $stuff_to_parse);
   3475             }
   3476         }
   3477 
   3478         //
   3479         // Now that's all done, replace /[number]/ with the correct styles
   3480         //
   3481         foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
   3482             if (!$this->use_classes) {
   3483                 $attributes = ' style="' .
   3484                     (isset($this->language_data['STYLES']['KEYWORDS'][$k]) ?
   3485                     $this->language_data['STYLES']['KEYWORDS'][$k] : "") . '"';
   3486             } else {
   3487                 $attributes = ' class="kw' . $k . '"';
   3488             }
   3489             $stuff_to_parse = str_replace("<|/$k/>", "<|$attributes>", $stuff_to_parse);
   3490         }
   3491 
   3492         if ($numbers_found) {
   3493             // Put number styles in
   3494             foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
   3495                 //Commented out for now, as this needs some review ...
   3496                 //                if ($numbers_permissions & $id) {
   3497                 //Get the appropriate style ...
   3498                 //Checking for unset styles is done by the style cache builder ...
   3499                 if (!$this->use_classes) {
   3500                     $attributes = ' style="' . $this->language_data['STYLES']['NUMBERS'][$id] . '"';
   3501                 } else {
   3502                     $attributes = ' class="nu'.$id.'"';
   3503                 }
   3504 
   3505                 //Set in the correct styles ...
   3506                 $stuff_to_parse = str_replace("/NUM!$id/", $attributes, $stuff_to_parse);
   3507                 //                }
   3508             }
   3509         }
   3510 
   3511         // Highlight methods and fields in objects
   3512         if ($this->lexic_permissions['METHODS'] && $this->language_data['OOLANG']) {
   3513             $oolang_spaces = "[\s]*";
   3514             $oolang_before = "";
   3515             $oolang_after = "[a-zA-Z][a-zA-Z0-9_]*";
   3516             if (isset($this->language_data['PARSER_CONTROL'])) {
   3517                 if (isset($this->language_data['PARSER_CONTROL']['OOLANG'])) {
   3518                     if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'])) {
   3519                         $oolang_before = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'];
   3520                     }
   3521                     if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'])) {
   3522                         $oolang_after = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'];
   3523                     }
   3524                     if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'])) {
   3525                         $oolang_spaces = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'];
   3526                     }
   3527                 }
   3528             }
   3529 
   3530             foreach ($this->language_data['OBJECT_SPLITTERS'] as $key => $splitter) {
   3531                 if (false !== strpos($stuff_to_parse, $splitter)) {
   3532                     if (!$this->use_classes) {
   3533                         $attributes = ' style="' . $this->language_data['STYLES']['METHODS'][$key] . '"';
   3534                     } else {
   3535                         $attributes = ' class="me' . $key . '"';
   3536                     }
   3537                     $stuff_to_parse = preg_replace("/($oolang_before)(" . preg_quote($this->language_data['OBJECT_SPLITTERS'][$key], '/') . ")($oolang_spaces)($oolang_after)/", "\\1\\2\\3<|$attributes>\\4|>", $stuff_to_parse);
   3538                 }
   3539             }
   3540         }
   3541 
   3542         //
   3543         // Highlight brackets. Yes, I've tried adding a semi-colon to this list.
   3544         // You try it, and see what happens ;)
   3545         // TODO: Fix lexic permissions not converting entities if shouldn't
   3546         // be highlighting regardless
   3547         //
   3548         if ($this->lexic_permissions['BRACKETS']) {
   3549             $stuff_to_parse = str_replace( $this->language_data['CACHE_BRACKET_MATCH'],
   3550                               $this->language_data['CACHE_BRACKET_REPLACE'], $stuff_to_parse );
   3551         }
   3552 
   3553 
   3554         //FIX for symbol highlighting ...
   3555         if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
   3556             //Get all matches and throw away those witin a block that is already highlighted... (i.e. matched by a regexp)
   3557             $n_symbols = preg_match_all("/<\|(?:<DOT>|[^>])+>(?:(?!\|>).*?)\|>|<\/a>|(?:" . $this->language_data['SYMBOL_SEARCH'] . ")+(?![^<]+?>)/", $stuff_to_parse, $pot_symbols, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
   3558             $global_offset = 0;
   3559             for ($s_id = 0; $s_id < $n_symbols; ++$s_id) {
   3560                 $symbol_match = $pot_symbols[$s_id][0][0];
   3561                 if (strpos($symbol_match, '<') !== false || strpos($symbol_match, '>') !== false) {
   3562                     // already highlighted blocks _must_ include either < or >
   3563                     // so if this conditional applies, we have to skip this match
   3564                     // BenBE: UNLESS the block contains <SEMI> or <PIPE>
   3565                     if(strpos($symbol_match, '<SEMI>') === false &&
   3566                         strpos($symbol_match, '<PIPE>') === false) {
   3567                         continue;
   3568                     }
   3569                 }
   3570 
   3571                 // if we reach this point, we have a valid match which needs to be highlighted
   3572 
   3573                 $symbol_length = strlen($symbol_match);
   3574                 $symbol_offset = $pot_symbols[$s_id][0][1];
   3575                 unset($pot_symbols[$s_id]);
   3576                 $symbol_end = $symbol_length + $symbol_offset;
   3577                 $symbol_hl = "";
   3578 
   3579                 // if we have multiple styles, we have to handle them properly
   3580                 if ($this->language_data['MULTIPLE_SYMBOL_GROUPS']) {
   3581                     $old_sym = -1;
   3582                     // Split the current stuff to replace into its atomic symbols ...
   3583                     preg_match_all("/" . $this->language_data['SYMBOL_SEARCH'] . "/", $symbol_match, $sym_match_syms, PREG_PATTERN_ORDER);
   3584                     foreach ($sym_match_syms[0] as $sym_ms) {
   3585                         //Check if consequtive symbols belong to the same group to save output ...
   3586                         if (isset($this->language_data['SYMBOL_DATA'][$sym_ms])
   3587                             && ($this->language_data['SYMBOL_DATA'][$sym_ms] != $old_sym)) {
   3588                             if (-1 != $old_sym) {
   3589                                 $symbol_hl .= "|>";
   3590                             }
   3591                             $old_sym = $this->language_data['SYMBOL_DATA'][$sym_ms];
   3592                             if (!$this->use_classes) {
   3593                                 $symbol_hl .= '<| style="' . $this->language_data['STYLES']['SYMBOLS'][$old_sym] . '">';
   3594                             } else {
   3595                                 $symbol_hl .= '<| class="sy' . $old_sym . '">';
   3596                             }
   3597                         }
   3598                         $symbol_hl .= $sym_ms;
   3599                     }
   3600                     unset($sym_match_syms);
   3601 
   3602                     //Close remaining tags and insert the replacement at the right position ...
   3603                     //Take caution if symbol_hl is empty to avoid doubled closing spans.
   3604                     if (-1 != $old_sym) {
   3605                         $symbol_hl .= "|>";
   3606                     }
   3607                 } else {
   3608                     if (!$this->use_classes) {
   3609                         $symbol_hl = '<| style="' . $this->language_data['STYLES']['SYMBOLS'][0] . '">';
   3610                     } else {
   3611                         $symbol_hl = '<| class="sy0">';
   3612                     }
   3613                     $symbol_hl .= $symbol_match . '|>';
   3614                 }
   3615 
   3616                 $stuff_to_parse = substr_replace($stuff_to_parse, $symbol_hl, $symbol_offset + $global_offset, $symbol_length);
   3617 
   3618                 // since we replace old text with something of different size,
   3619                 // we'll have to keep track of the differences
   3620                 $global_offset += strlen($symbol_hl) - $symbol_length;
   3621             }
   3622         }
   3623         //FIX for symbol highlighting ...
   3624 
   3625         // Add class/style for regexps
   3626         foreach (array_keys($this->language_data['REGEXPS']) as $key) {
   3627             if ($this->lexic_permissions['REGEXPS'][$key]) {
   3628                 if (is_callable($this->language_data['STYLES']['REGEXPS'][$key])) {
   3629                     $this->_rx_key = $key;
   3630                     $stuff_to_parse = preg_replace_callback("/!REG3XP$key!(.*)\|>/U",
   3631                         array($this, 'handle_regexps_callback'),
   3632                         $stuff_to_parse);
   3633                 } else {
   3634                     if (!$this->use_classes) {
   3635                         $attributes = ' style="' . $this->language_data['STYLES']['REGEXPS'][$key] . '"';
   3636                     } else {
   3637                         if (is_array($this->language_data['REGEXPS'][$key]) &&
   3638                             array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$key])) {
   3639                             $attributes = ' class="' .
   3640                                 $this->language_data['REGEXPS'][$key][GESHI_CLASS] . '"';
   3641                         } else {
   3642                            $attributes = ' class="re' . $key . '"';
   3643                         }
   3644                     }
   3645                     $stuff_to_parse = str_replace("!REG3XP$key!", "$attributes", $stuff_to_parse);
   3646                 }
   3647             }
   3648         }
   3649 
   3650         // Replace <DOT> with . for urls
   3651         $stuff_to_parse = str_replace('<DOT>', '.', $stuff_to_parse);
   3652         // Replace <|UR1| with <a href= for urls also
   3653         if (isset($this->link_styles[GESHI_LINK])) {
   3654             if ($this->use_classes) {
   3655                 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
   3656             } else {
   3657                 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' style="' . $this->link_styles[GESHI_LINK] . '" href=', $stuff_to_parse);
   3658             }
   3659         } else {
   3660             $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
   3661         }
   3662 
   3663         //
   3664         // NOW we add the span thingy ;)
   3665         //
   3666 
   3667         $stuff_to_parse = str_replace('<|', '<span', $stuff_to_parse);
   3668         $stuff_to_parse = str_replace ( '|>', '</span>', $stuff_to_parse );
   3669         return substr($stuff_to_parse, 1);
   3670     }
   3671 
   3672     /**
   3673      * Sets the time taken to parse the code
   3674      *
   3675      * @param microtime The time when parsing started
   3676      * @param microtime The time when parsing ended
   3677      * @since 1.0.2
   3678      * @access private
   3679      */
   3680     function set_time($start_time, $end_time) {
   3681         $start = explode(' ', $start_time);
   3682         $end = explode(' ', $end_time);
   3683         $this->time = $end[0] + $end[1] - $start[0] - $start[1];
   3684     }
   3685 
   3686     /**
   3687      * Gets the time taken to parse the code
   3688      *
   3689      * @return double The time taken to parse the code
   3690      * @since  1.0.2
   3691      */
   3692     function get_time() {
   3693         return $this->time;
   3694     }
   3695 
   3696     /**
   3697      * Merges arrays recursively, overwriting values of the first array with values of later arrays
   3698      *
   3699      * @since 1.0.8
   3700      * @access private
   3701      */
   3702     function merge_arrays() {
   3703         $arrays = func_get_args();
   3704         $narrays = count($arrays);
   3705 
   3706         // check arguments
   3707         // comment out if more performance is necessary (in this case the foreach loop will trigger a warning if the argument is not an array)
   3708         for ($i = 0; $i < $narrays; $i ++) {
   3709             if (!is_array($arrays[$i])) {
   3710                 // also array_merge_recursive returns nothing in this case
   3711                 trigger_error('Argument #' . ($i+1) . ' is not an array - trying to merge array with scalar! Returning false!', E_USER_WARNING);
   3712                 return false;
   3713             }
   3714         }
   3715 
   3716         // the first array is in the output set in every case
   3717         $ret = $arrays[0];
   3718 
   3719         // merege $ret with the remaining arrays
   3720         for ($i = 1; $i < $narrays; $i ++) {
   3721             foreach ($arrays[$i] as $key => $value) {
   3722                 if (is_array($value) && isset($ret[$key])) {
   3723                     // if $ret[$key] is not an array you try to merge an scalar value with an array - the result is not defined (incompatible arrays)
   3724                     // in this case the call will trigger an E_USER_WARNING and the $ret[$key] will be false.
   3725                     $ret[$key] = $this->merge_arrays($ret[$key], $value);
   3726                 } else {
   3727                     $ret[$key] = $value;
   3728                 }
   3729             }
   3730         }
   3731 
   3732         return $ret;
   3733     }
   3734 
   3735     /**
   3736      * Gets language information and stores it for later use
   3737      *
   3738      * @param string The filename of the language file you want to load
   3739      * @since 1.0.0
   3740      * @access private
   3741      * @todo Needs to load keys for lexic permissions for keywords, regexps etc
   3742      */
   3743     function load_language($file_name) {
   3744         if ($file_name == $this->loaded_language) {
   3745             // this file is already loaded!
   3746             return;
   3747         }
   3748 
   3749         //Prepare some stuff before actually loading the language file
   3750         $this->loaded_language = $file_name;
   3751         $this->parse_cache_built = false;
   3752         $this->enable_highlighting();
   3753         $language_data = array();
   3754 
   3755         //Load the language file
   3756         require $file_name;
   3757 
   3758         // Perhaps some checking might be added here later to check that
   3759         // $language data is a valid thing but maybe not
   3760         $this->language_data = $language_data;
   3761 
   3762         // Set strict mode if should be set
   3763         $this->strict_mode = $this->language_data['STRICT_MODE_APPLIES'];
   3764 
   3765         // Set permissions for all lexics to true
   3766         // so they'll be highlighted by default
   3767         foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
   3768             if (!empty($this->language_data['KEYWORDS'][$key])) {
   3769                 $this->lexic_permissions['KEYWORDS'][$key] = true;
   3770             } else {
   3771                 $this->lexic_permissions['KEYWORDS'][$key] = false;
   3772             }
   3773         }
   3774 
   3775         foreach (array_keys($this->language_data['COMMENT_SINGLE']) as $key) {
   3776             $this->lexic_permissions['COMMENTS'][$key] = true;
   3777         }
   3778         foreach (array_keys($this->language_data['REGEXPS']) as $key) {
   3779             $this->lexic_permissions['REGEXPS'][$key] = true;
   3780         }
   3781 
   3782         // for BenBE and future code reviews:
   3783         // we can use empty here since we only check for existance and emptiness of an array
   3784         // if it is not an array at all but rather false or null this will work as intended as well
   3785         // even if $this->language_data['PARSER_CONTROL'] is undefined this won't trigger a notice
   3786         if (!empty($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'])) {
   3787             foreach ($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'] as $flag => $value) {
   3788                 // it's either true or false and maybe is true as well
   3789                 $perm = $value !== GESHI_NEVER;
   3790                 if ($flag == 'ALL') {
   3791                     $this->enable_highlighting($perm);
   3792                     continue;
   3793                 }
   3794                 if (!isset($this->lexic_permissions[$flag])) {
   3795                     // unknown lexic permission
   3796                     continue;
   3797                 }
   3798                 if (is_array($this->lexic_permissions[$flag])) {
   3799                     foreach ($this->lexic_permissions[$flag] as $key => $val) {
   3800                         $this->lexic_permissions[$flag][$key] = $perm;
   3801                     }
   3802                 } else {
   3803                     $this->lexic_permissions[$flag] = $perm;
   3804                 }
   3805             }
   3806             unset($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS']);
   3807         }
   3808 
   3809         //Fix: Problem where hardescapes weren't handled if no ESCAPE_CHAR was given
   3810         //You need to set one for HARDESCAPES only in this case.
   3811         if(!isset($this->language_data['HARDCHAR'])) {
   3812             $this->language_data['HARDCHAR'] = $this->language_data['ESCAPE_CHAR'];
   3813         }
   3814 
   3815         //NEW in 1.0.8: Allow styles to be loaded from a separate file to override defaults
   3816         $style_filename = substr($file_name, 0, -4) . '.style.php';
   3817         if (is_readable($style_filename)) {
   3818             //Clear any style_data that could have been set before ...
   3819             if (isset($style_data)) {
   3820                 unset($style_data);
   3821             }
   3822 
   3823             //Read the Style Information from the style file
   3824             include $style_filename;
   3825 
   3826             //Apply the new styles to our current language styles
   3827             if (isset($style_data) && is_array($style_data)) {
   3828                 $this->language_data['STYLES'] =
   3829                     $this->merge_arrays($this->language_data['STYLES'], $style_data);
   3830             }
   3831         }
   3832     }
   3833 
   3834     /**
   3835      * Takes the parsed code and various options, and creates the HTML
   3836      * surrounding it to make it look nice.
   3837      *
   3838      * @param  string The code already parsed (reference!)
   3839      * @since  1.0.0
   3840      * @access private
   3841      */
   3842     function finalise(&$parsed_code) {
   3843         // Remove end parts of important declarations
   3844         // This is BUGGY!! My fault for bad code: fix coming in 1.2
   3845         // @todo Remove this crap
   3846         if ($this->enable_important_blocks &&
   3847             (strpos($parsed_code, $this->hsc(GESHI_START_IMPORTANT)) === false)) {
   3848             $parsed_code = str_replace($this->hsc(GESHI_END_IMPORTANT), '', $parsed_code);
   3849         }
   3850 
   3851         // Add HTML whitespace stuff if we're using the <div> header
   3852         if ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) {
   3853             $this->indent($parsed_code);
   3854         }
   3855 
   3856         // purge some unnecessary stuff
   3857         /** NOTE: memorypeak #1 */
   3858         $parsed_code = preg_replace('#<span[^>]+>(\s*)</span>#', '\\1', $parsed_code);
   3859 
   3860         // If we are using IDs for line numbers, there needs to be an overall
   3861         // ID set to prevent collisions.
   3862         if ($this->add_ids && !$this->overall_id) {
   3863             $this->overall_id = 'geshi-' . substr(md5(microtime()), 0, 4);
   3864         }
   3865 
   3866         // Get code into lines
   3867         /** NOTE: memorypeak #2 */
   3868         $code = explode("\n", $parsed_code);
   3869         $parsed_code = $this->header();
   3870 
   3871         // If we're using line numbers, we insert <li>s and appropriate
   3872         // markup to style them (otherwise we don't need to do anything)
   3873         if ($this->line_numbers != GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
   3874             // If we're using the <pre> header, we shouldn't add newlines because
   3875             // the <pre> will line-break them (and the <li>s already do this for us)
   3876             $ls = ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) ? "\n" : '';
   3877 
   3878             // Set vars to defaults for following loop
   3879             $i = 0;
   3880 
   3881             // Foreach line...
   3882             for ($i = 0, $n = count($code); $i < $n;) {
   3883                 //Reset the attributes for a new line ...
   3884                 $attrs = array();
   3885 
   3886                 // Make lines have at least one space in them if they're empty
   3887                 // BenBE: Checking emptiness using trim instead of relying on blanks
   3888                 if ('' == trim($code[$i])) {
   3889                     $code[$i] = '&nbsp;';
   3890                 }
   3891 
   3892                 // If this is a "special line"...
   3893                 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
   3894                     $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
   3895                     // Set the attributes to style the line
   3896                     if ($this->use_classes) {
   3897                         //$attr = ' class="li2"';
   3898                         $attrs['class'][] = 'li2';
   3899                         $def_attr = ' class="de2"';
   3900                     } else {
   3901                         //$attr = ' style="' . $this->line_style2 . '"';
   3902                         $attrs['style'][] = $this->line_style2;
   3903                         // This style "covers up" the special styles set for special lines
   3904                         // so that styles applied to special lines don't apply to the actual
   3905                         // code on that line
   3906                         $def_attr = ' style="' . $this->code_style . '"';
   3907                     }
   3908                 } else {
   3909                     if ($this->use_classes) {
   3910                         //$attr = ' class="li1"';
   3911                         $attrs['class'][] = 'li1';
   3912                         $def_attr = ' class="de1"';
   3913                     } else {
   3914                         //$attr = ' style="' . $this->line_style1 . '"';
   3915                         $attrs['style'][] = $this->line_style1;
   3916                         $def_attr = ' style="' . $this->code_style . '"';
   3917                     }
   3918                 }
   3919 
   3920                 //Check which type of tag to insert for this line
   3921                 if ($this->header_type == GESHI_HEADER_PRE_VALID) {
   3922                     $start = "<pre$def_attr>";
   3923                     $end = '</pre>';
   3924                 } else {
   3925                     // Span or div?
   3926                     $start = "<div$def_attr>";
   3927                     $end = '</div>';
   3928                 }
   3929 
   3930                 ++$i;
   3931 
   3932                 // Are we supposed to use ids? If so, add them
   3933                 if ($this->add_ids) {
   3934                     $attrs['id'][] = "$this->overall_id-$i";
   3935                 }
   3936 
   3937                 //Is this some line with extra styles???
   3938                 if (in_array($i, $this->highlight_extra_lines)) {
   3939                     if ($this->use_classes) {
   3940                         if (isset($this->highlight_extra_lines_styles[$i])) {
   3941                             $attrs['class'][] = "lx$i";
   3942                         } else {
   3943                             $attrs['class'][] = "ln-xtra";
   3944                         }
   3945                     } else {
   3946                         array_push($attrs['style'], $this->get_line_style($i));
   3947                     }
   3948                 }
   3949 
   3950                 // Add in the line surrounded by appropriate list HTML
   3951                 $attr_string = '';
   3952                 foreach ($attrs as $key => $attr) {
   3953                     $attr_string .= ' ' . $key . '="' . implode(' ', $attr) . '"';
   3954                 }
   3955 
   3956                 $parsed_code .= "<li$attr_string>$start{$code[$i-1]}$end</li>$ls";
   3957                 unset($code[$i - 1]);
   3958             }
   3959         } else {
   3960             $n = count($code);
   3961             if ($this->use_classes) {
   3962                 $attributes = ' class="de1"';
   3963             } else {
   3964                 $attributes = ' style="'. $this->code_style .'"';
   3965             }
   3966             if ($this->header_type == GESHI_HEADER_PRE_VALID) {
   3967                 $parsed_code .= '<pre'. $attributes .'>';
   3968             } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
   3969                 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
   3970                     if ($this->use_classes) {
   3971                         $attrs = ' class="ln"';
   3972                     } else {
   3973                         $attrs = ' style="'. $this->table_linenumber_style .'"';
   3974                     }
   3975                     $parsed_code .= '<td'.$attrs.'><pre'.$attributes.'>';
   3976                     // get linenumbers
   3977                     // we don't merge it with the for below, since it should be better for
   3978                     // memory consumption this way
   3979                     // @todo: but... actually it would still be somewhat nice to merge the two loops
   3980                     //        the mem peaks are at different positions
   3981                     for ($i = 0; $i < $n; ++$i) {
   3982                         $close = 0;
   3983                         // fancy lines
   3984                         if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
   3985                             $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
   3986                             // Set the attributes to style the line
   3987                             if ($this->use_classes) {
   3988                                 $parsed_code .= '<span class="xtra li2"><span class="de2">';
   3989                             } else {
   3990                                 // This style "covers up" the special styles set for special lines
   3991                                 // so that styles applied to special lines don't apply to the actual
   3992                                 // code on that line
   3993                                 $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
   3994                                                   .'<span style="' . $this->code_style .'">';
   3995                             }
   3996                             $close += 2;
   3997                         }
   3998                         //Is this some line with extra styles???
   3999                         if (in_array($i + 1, $this->highlight_extra_lines)) {
   4000                             if ($this->use_classes) {
   4001                                 if (isset($this->highlight_extra_lines_styles[$i])) {
   4002                                     $parsed_code .= "<span class=\"xtra lx$i\">";
   4003                                 } else {
   4004                                     $parsed_code .= "<span class=\"xtra ln-xtra\">";
   4005                                 }
   4006                             } else {
   4007                                 $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
   4008                             }
   4009                             ++$close;
   4010                         }
   4011                         $parsed_code .= $this->line_numbers_start + $i;
   4012                         if ($close) {
   4013                             $parsed_code .= str_repeat('</span>', $close);
   4014                         } elseif ($i != $n) {
   4015                             $parsed_code .= "\n";
   4016                         }
   4017                     }
   4018                     $parsed_code .= '</pre></td><td'.$attributes.'>';
   4019                 }
   4020                 $parsed_code .= '<pre'. $attributes .'>';
   4021             }
   4022             // No line numbers, but still need to handle highlighting lines extra.
   4023             // Have to use divs so the full width of the code is highlighted
   4024             $close = 0;
   4025             for ($i = 0; $i < $n; ++$i) {
   4026                 // Make lines have at least one space in them if they're empty
   4027                 // BenBE: Checking emptiness using trim instead of relying on blanks
   4028                 if ('' == trim($code[$i])) {
   4029                     $code[$i] = '&nbsp;';
   4030                 }
   4031                 // fancy lines
   4032                 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
   4033                     $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
   4034                     // Set the attributes to style the line
   4035                     if ($this->use_classes) {
   4036                         $parsed_code .= '<span class="xtra li2"><span class="de2">';
   4037                     } else {
   4038                         // This style "covers up" the special styles set for special lines
   4039                         // so that styles applied to special lines don't apply to the actual
   4040                         // code on that line
   4041                         $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
   4042                                           .'<span style="' . $this->code_style .'">';
   4043                     }
   4044                     $close += 2;
   4045                 }
   4046                 //Is this some line with extra styles???
   4047                 if (in_array($i + 1, $this->highlight_extra_lines)) {
   4048                     if ($this->use_classes) {
   4049                         if (isset($this->highlight_extra_lines_styles[$i])) {
   4050                             $parsed_code .= "<span class=\"xtra lx$i\">";
   4051                         } else {
   4052                             $parsed_code .= "<span class=\"xtra ln-xtra\">";
   4053                         }
   4054                     } else {
   4055                         $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
   4056                     }
   4057                     ++$close;
   4058                 }
   4059 
   4060                 $parsed_code .= $code[$i];
   4061 
   4062                 if ($close) {
   4063                   $parsed_code .= str_repeat('</span>', $close);
   4064                   $close = 0;
   4065                 }
   4066                 elseif ($i + 1 < $n) {
   4067                     $parsed_code .= "\n";
   4068                 }
   4069                 unset($code[$i]);
   4070             }
   4071 
   4072             if ($this->header_type == GESHI_HEADER_PRE_VALID || $this->header_type == GESHI_HEADER_PRE_TABLE) {
   4073                 $parsed_code .= '</pre>';
   4074             }
   4075             if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
   4076                 $parsed_code .= '</td>';
   4077             }
   4078         }
   4079 
   4080         $parsed_code .= $this->footer();
   4081     }
   4082 
   4083     /**
   4084      * Creates the header for the code block (with correct attributes)
   4085      *
   4086      * @return string The header for the code block
   4087      * @since  1.0.0
   4088      * @access private
   4089      */
   4090     function header() {
   4091         // Get attributes needed
   4092         /**
   4093          * @todo   Document behaviour change - class is outputted regardless of whether
   4094          *         we're using classes or not. Same with style
   4095          */
   4096         $attributes = ' class="' . $this->_genCSSName($this->language);
   4097         if ($this->overall_class != '') {
   4098             $attributes .= " ".$this->_genCSSName($this->overall_class);
   4099         }
   4100         $attributes .= '"';
   4101 
   4102         if ($this->overall_id != '') {
   4103             $attributes .= " id=\"{$this->overall_id}\"";
   4104         }
   4105         if ($this->overall_style != '' && !$this->use_classes) {
   4106             $attributes .= ' style="' . $this->overall_style . '"';
   4107         }
   4108 
   4109         $ol_attributes = '';
   4110 
   4111         if ($this->line_numbers_start != 1) {
   4112             $ol_attributes .= ' start="' . $this->line_numbers_start . '"';
   4113         }
   4114 
   4115         // Get the header HTML
   4116         $header = $this->header_content;
   4117         if ($header) {
   4118             if ($this->header_type == GESHI_HEADER_PRE || $this->header_type == GESHI_HEADER_PRE_VALID) {
   4119                 $header = str_replace("\n", '', $header);
   4120             }
   4121             $header = $this->replace_keywords($header);
   4122 
   4123             if ($this->use_classes) {
   4124                 $attr = ' class="head"';
   4125             } else {
   4126                 $attr = " style=\"{$this->header_content_style}\"";
   4127             }
   4128             if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
   4129                 $header = "<thead><tr><td colspan=\"2\" $attr>$header</td></tr></thead>";
   4130             } else {
   4131                 $header = "<div$attr>$header</div>";
   4132             }
   4133         }
   4134 
   4135         if (GESHI_HEADER_NONE == $this->header_type) {
   4136             if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
   4137                 return "$header<ol$attributes$ol_attributes>";
   4138             }
   4139             return $header . ($this->force_code_block ? '<div>' : '');
   4140         }
   4141 
   4142         // Work out what to return and do it
   4143         if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
   4144             if ($this->header_type == GESHI_HEADER_PRE) {
   4145                 return "<pre$attributes>$header<ol$ol_attributes>";
   4146             } elseif ($this->header_type == GESHI_HEADER_DIV ||
   4147                 $this->header_type == GESHI_HEADER_PRE_VALID) {
   4148                 return "<div$attributes>$header<ol$ol_attributes>";
   4149             } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
   4150                 return "<table$attributes>$header<tbody><tr class=\"li1\">";
   4151             }
   4152         } else {
   4153             if ($this->header_type == GESHI_HEADER_PRE) {
   4154                 return "<pre$attributes>$header"  .
   4155                     ($this->force_code_block ? '<div>' : '');
   4156             } else {
   4157                 return "<div$attributes>$header" .
   4158                     ($this->force_code_block ? '<div>' : '');
   4159             }
   4160         }
   4161     }
   4162 
   4163     /**
   4164      * Returns the footer for the code block.
   4165      *
   4166      * @return string The footer for the code block
   4167      * @since  1.0.0
   4168      * @access private
   4169      */
   4170     function footer() {
   4171         $footer = $this->footer_content;
   4172         if ($footer) {
   4173             if ($this->header_type == GESHI_HEADER_PRE) {
   4174                 $footer = str_replace("\n", '', $footer);;
   4175             }
   4176             $footer = $this->replace_keywords($footer);
   4177 
   4178             if ($this->use_classes) {
   4179                 $attr = ' class="foot"';
   4180             } else {
   4181                 $attr = " style=\"{$this->footer_content_style}\"";
   4182             }
   4183             if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
   4184                 $footer = "<tfoot><tr><td colspan=\"2\">$footer</td></tr></tfoot>";
   4185             } else {
   4186                 $footer = "<div$attr>$footer</div>";
   4187             }
   4188         }
   4189 
   4190         if (GESHI_HEADER_NONE == $this->header_type) {
   4191             return ($this->line_numbers != GESHI_NO_LINE_NUMBERS) ? '</ol>' . $footer : $footer;
   4192         }
   4193 
   4194         if ($this->header_type == GESHI_HEADER_DIV || $this->header_type == GESHI_HEADER_PRE_VALID) {
   4195             if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
   4196                 return "</ol>$footer</div>";
   4197             }
   4198             return ($this->force_code_block ? '</div>' : '') .
   4199                 "$footer</div>";
   4200         }
   4201         elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
   4202             if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
   4203                 return "</tr></tbody>$footer</table>";
   4204             }
   4205             return ($this->force_code_block ? '</div>' : '') .
   4206                 "$footer</div>";
   4207         }
   4208         else {
   4209             if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
   4210                 return "</ol>$footer</pre>";
   4211             }
   4212             return ($this->force_code_block ? '</div>' : '') .
   4213                 "$footer</pre>";
   4214         }
   4215     }
   4216 
   4217     /**
   4218      * Replaces certain keywords in the header and footer with
   4219      * certain configuration values
   4220      *
   4221      * @param  string The header or footer content to do replacement on
   4222      * @return string The header or footer with replaced keywords
   4223      * @since  1.0.2
   4224      * @access private
   4225      */
   4226     function replace_keywords($instr) {
   4227         $keywords = $replacements = array();
   4228 
   4229         $keywords[] = '<TIME>';
   4230         $keywords[] = '{TIME}';
   4231         $replacements[] = $replacements[] = number_format($time = $this->get_time(), 3);
   4232 
   4233         $keywords[] = '<LANGUAGE>';
   4234         $keywords[] = '{LANGUAGE}';
   4235         $replacements[] = $replacements[] = $this->language_data['LANG_NAME'];
   4236 
   4237         $keywords[] = '<VERSION>';
   4238         $keywords[] = '{VERSION}';
   4239         $replacements[] = $replacements[] = GESHI_VERSION;
   4240 
   4241         $keywords[] = '<SPEED>';
   4242         $keywords[] = '{SPEED}';
   4243         if ($time <= 0) {
   4244             $speed = 'N/A';
   4245         } else {
   4246             $speed = strlen($this->source) / $time;
   4247             if ($speed >= 1024) {
   4248                 $speed = sprintf("%.2f KB/s", $speed / 1024.0);
   4249             } else {
   4250                 $speed = sprintf("%.0f B/s", $speed);
   4251             }
   4252         }
   4253         $replacements[] = $replacements[] = $speed;
   4254 
   4255         return str_replace($keywords, $replacements, $instr);
   4256     }
   4257 
   4258     /**
   4259      * Secure replacement for PHP built-in function htmlspecialchars().
   4260      *
   4261      * See ticket #427 (http://wush.net/trac/wikka/ticket/427) for the rationale
   4262      * for this replacement function.
   4263      *
   4264      * The INTERFACE for this function is almost the same as that for
   4265      * htmlspecialchars(), with the same default for quote style; however, there
   4266      * is no 'charset' parameter. The reason for this is as follows:
   4267      *
   4268      * The PHP docs say:
   4269      *      "The third argument charset defines character set used in conversion."
   4270      *
   4271      * I suspect PHP's htmlspecialchars() is working at the byte-value level and
   4272      * thus _needs_ to know (or asssume) a character set because the special
   4273      * characters to be replaced could exist at different code points in
   4274      * different character sets. (If indeed htmlspecialchars() works at
   4275      * byte-value level that goes some  way towards explaining why the
   4276      * vulnerability would exist in this function, too, and not only in
   4277      * htmlentities() which certainly is working at byte-value level.)
   4278      *
   4279      * This replacement function however works at character level and should
   4280      * therefore be "immune" to character set differences - so no charset
   4281      * parameter is needed or provided. If a third parameter is passed, it will
   4282      * be silently ignored.
   4283      *
   4284      * In the OUTPUT there is a minor difference in that we use '&#39;' instead
   4285      * of PHP's '&#039;' for a single quote: this provides compatibility with
   4286      *      get_html_translation_table(HTML_SPECIALCHARS, ENT_QUOTES)
   4287      * (see comment by mikiwoz at yahoo dot co dot uk on
   4288      * http://php.net/htmlspecialchars); it also matches the entity definition
   4289      * for XML 1.0
   4290      * (http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters).
   4291      * Like PHP we use a numeric character reference instead of '&apos;' for the
   4292      * single quote. For the other special characters we use the named entity
   4293      * references, as PHP is doing.
   4294      *
   4295      * @author      {@link http://wikkawiki.org/JavaWoman Marjolein Katsma}
   4296      *
   4297      * @license     http://www.gnu.org/copyleft/lgpl.html
   4298      *              GNU Lesser General Public License
   4299      * @copyright   Copyright 2007, {@link http://wikkawiki.org/CreditsPage
   4300      *              Wikka Development Team}
   4301      *
   4302      * @access      private
   4303      * @param       string  $string string to be converted
   4304      * @param       integer $quote_style
   4305      *                      - ENT_COMPAT:   escapes &, <, > and double quote (default)
   4306      *                      - ENT_NOQUOTES: escapes only &, < and >
   4307      *                      - ENT_QUOTES:   escapes &, <, >, double and single quotes
   4308      * @return      string  converted string
   4309      * @since       1.0.7.18
   4310      */
   4311     function hsc($string, $quote_style = ENT_COMPAT) {
   4312         // init
   4313         static $aTransSpecchar = array(
   4314             '&' => '&amp;',
   4315             '"' => '&quot;',
   4316             '<' => '&lt;',
   4317             '>' => '&gt;',
   4318 
   4319             //This fix is related to SF#1923020, but has to be applied
   4320             //regardless of actually highlighting symbols.
   4321 
   4322             //Circumvent a bug with symbol highlighting
   4323             //This is required as ; would produce undesirable side-effects if it
   4324             //was not to be processed as an entity.
   4325             ';' => '<SEMI>', // Force ; to be processed as entity
   4326             '|' => '<PIPE>' // Force | to be processed as entity
   4327             );                      // ENT_COMPAT set
   4328 
   4329         switch ($quote_style) {
   4330             case ENT_NOQUOTES: // don't convert double quotes
   4331                 unset($aTransSpecchar['"']);
   4332                 break;
   4333             case ENT_QUOTES: // convert single quotes as well
   4334                 $aTransSpecchar["'"] = '&#39;'; // (apos) htmlspecialchars() uses '&#039;'
   4335                 break;
   4336         }
   4337 
   4338         // return translated string
   4339         return strtr($string, $aTransSpecchar);
   4340     }
   4341 
   4342     function _genCSSName($name){
   4343         return (is_numeric($name[0]) ? '_' : '') . $name;
   4344     }
   4345 
   4346     /**
   4347      * Returns a stylesheet for the highlighted code. If $economy mode
   4348      * is true, we only return the stylesheet declarations that matter for
   4349      * this code block instead of the whole thing
   4350      *
   4351      * @param  boolean Whether to use economy mode or not
   4352      * @return string A stylesheet built on the data for the current language
   4353      * @since  1.0.0
   4354      */
   4355     function get_stylesheet($economy_mode = true) {
   4356         // If there's an error, chances are that the language file
   4357         // won't have populated the language data file, so we can't
   4358         // risk getting a stylesheet...
   4359         if ($this->error) {
   4360             return '';
   4361         }
   4362 
   4363         //Check if the style rearrangements have been processed ...
   4364         //This also does some preprocessing to check which style groups are useable ...
   4365         if(!isset($this->language_data['NUMBERS_CACHE'])) {
   4366             $this->build_style_cache();
   4367         }
   4368 
   4369         // First, work out what the selector should be. If there's an ID,
   4370         // that should be used, the same for a class. Otherwise, a selector
   4371         // of '' means that these styles will be applied anywhere
   4372         if ($this->overall_id) {
   4373             $selector = '#' . $this->_genCSSName($this->overall_id);
   4374         } else {
   4375             $selector = '.' . $this->_genCSSName($this->language);
   4376             if ($this->overall_class) {
   4377                 $selector .= '.' . $this->_genCSSName($this->overall_class);
   4378             }
   4379         }
   4380         $selector .= ' ';
   4381 
   4382         // Header of the stylesheet
   4383         if (!$economy_mode) {
   4384             $stylesheet = "/**\n".
   4385                 " * GeSHi Dynamically Generated Stylesheet\n".
   4386                 " * --------------------------------------\n".
   4387                 " * Dynamically generated stylesheet for {$this->language}\n".
   4388                 " * CSS class: {$this->overall_class}, CSS id: {$this->overall_id}\n".
   4389                 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" .
   4390                 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
   4391                 " * --------------------------------------\n".
   4392                 " */\n";
   4393         } else {
   4394             $stylesheet = "/**\n".
   4395                 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" .
   4396                 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
   4397                 " */\n";
   4398         }
   4399 
   4400         // Set the <ol> to have no effect at all if there are line numbers
   4401         // (<ol>s have margins that should be destroyed so all layout is
   4402         // controlled by the set_overall_style method, which works on the
   4403         // <pre> or <div> container). Additionally, set default styles for lines
   4404         if (!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
   4405             //$stylesheet .= "$selector, {$selector}ol, {$selector}ol li {margin: 0;}\n";
   4406             $stylesheet .= "$selector.de1, $selector.de2 {{$this->code_style}}\n";
   4407         }
   4408 
   4409         // Add overall styles
   4410         // note: neglect economy_mode, empty styles are meaningless
   4411         if ($this->overall_style != '') {
   4412             $stylesheet .= "$selector {{$this->overall_style}}\n";
   4413         }
   4414 
   4415         // Add styles for links
   4416         // note: economy mode does not make _any_ sense here
   4417         //       either the style is empty and thus no selector is needed
   4418         //       or the appropriate key is given.
   4419         foreach ($this->link_styles as $key => $style) {
   4420             if ($style != '') {
   4421                 switch ($key) {
   4422                     case GESHI_LINK:
   4423                         $stylesheet .= "{$selector}a:link {{$style}}\n";
   4424                         break;
   4425                     case GESHI_HOVER:
   4426                         $stylesheet .= "{$selector}a:hover {{$style}}\n";
   4427                         break;
   4428                     case GESHI_ACTIVE:
   4429                         $stylesheet .= "{$selector}a:active {{$style}}\n";
   4430                         break;
   4431                     case GESHI_VISITED:
   4432                         $stylesheet .= "{$selector}a:visited {{$style}}\n";
   4433                         break;
   4434                 }
   4435             }
   4436         }
   4437 
   4438         // Header and footer
   4439         // note: neglect economy_mode, empty styles are meaningless
   4440         if ($this->header_content_style != '') {
   4441             $stylesheet .= "$selector.head {{$this->header_content_style}}\n";
   4442         }
   4443         if ($this->footer_content_style != '') {
   4444             $stylesheet .= "$selector.foot {{$this->footer_content_style}}\n";
   4445         }
   4446 
   4447         // Styles for important stuff
   4448         // note: neglect economy_mode, empty styles are meaningless
   4449         if ($this->important_styles != '') {
   4450             $stylesheet .= "$selector.imp {{$this->important_styles}}\n";
   4451         }
   4452 
   4453         // Simple line number styles
   4454         if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->line_style1 != '') {
   4455             $stylesheet .= "{$selector}li, {$selector}.li1 {{$this->line_style1}}\n";
   4456         }
   4457         if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->table_linenumber_style != '') {
   4458             $stylesheet .= "{$selector}.ln {{$this->table_linenumber_style}}\n";
   4459         }
   4460         // If there is a style set for fancy line numbers, echo it out
   4461         if ((!$economy_mode || $this->line_numbers == GESHI_FANCY_LINE_NUMBERS) && $this->line_style2 != '') {
   4462             $stylesheet .= "{$selector}.li2 {{$this->line_style2}}\n";
   4463         }
   4464 
   4465         // note: empty styles are meaningless
   4466         foreach ($this->language_data['STYLES']['KEYWORDS'] as $group => $styles) {
   4467             if ($styles != '' && (!$economy_mode ||
   4468                 (isset($this->lexic_permissions['KEYWORDS'][$group]) &&
   4469                 $this->lexic_permissions['KEYWORDS'][$group]))) {
   4470                 $stylesheet .= "$selector.kw$group {{$styles}}\n";
   4471             }
   4472         }
   4473         foreach ($this->language_data['STYLES']['COMMENTS'] as $group => $styles) {
   4474             if ($styles != '' && (!$economy_mode ||
   4475                 (isset($this->lexic_permissions['COMMENTS'][$group]) &&
   4476                 $this->lexic_permissions['COMMENTS'][$group]) ||
   4477                 (!empty($this->language_data['COMMENT_REGEXP']) &&
   4478                 !empty($this->language_data['COMMENT_REGEXP'][$group])))) {
   4479                 $stylesheet .= "$selector.co$group {{$styles}}\n";
   4480             }
   4481         }
   4482         foreach ($this->language_data['STYLES']['ESCAPE_CHAR'] as $group => $styles) {
   4483             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['ESCAPE_CHAR'])) {
   4484                 // NEW: since 1.0.8 we have to handle hardescapes
   4485                 if ($group === 'HARD') {
   4486                     $group = '_h';
   4487                 }
   4488                 $stylesheet .= "$selector.es$group {{$styles}}\n";
   4489             }
   4490         }
   4491         foreach ($this->language_data['STYLES']['BRACKETS'] as $group => $styles) {
   4492             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['BRACKETS'])) {
   4493                 $stylesheet .= "$selector.br$group {{$styles}}\n";
   4494             }
   4495         }
   4496         foreach ($this->language_data['STYLES']['SYMBOLS'] as $group => $styles) {
   4497             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['SYMBOLS'])) {
   4498                 $stylesheet .= "$selector.sy$group {{$styles}}\n";
   4499             }
   4500         }
   4501         foreach ($this->language_data['STYLES']['STRINGS'] as $group => $styles) {
   4502             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['STRINGS'])) {
   4503                 // NEW: since 1.0.8 we have to handle hardquotes
   4504                 if ($group === 'HARD') {
   4505                     $group = '_h';
   4506                 }
   4507                 $stylesheet .= "$selector.st$group {{$styles}}\n";
   4508             }
   4509         }
   4510         foreach ($this->language_data['STYLES']['NUMBERS'] as $group => $styles) {
   4511             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['NUMBERS'])) {
   4512                 $stylesheet .= "$selector.nu$group {{$styles}}\n";
   4513             }
   4514         }
   4515         foreach ($this->language_data['STYLES']['METHODS'] as $group => $styles) {
   4516             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['METHODS'])) {
   4517                 $stylesheet .= "$selector.me$group {{$styles}}\n";
   4518             }
   4519         }
   4520         // note: neglect economy_mode, empty styles are meaningless
   4521         foreach ($this->language_data['STYLES']['SCRIPT'] as $group => $styles) {
   4522             if ($styles != '') {
   4523                 $stylesheet .= "$selector.sc$group {{$styles}}\n";
   4524             }
   4525         }
   4526         foreach ($this->language_data['STYLES']['REGEXPS'] as $group => $styles) {
   4527             if ($styles != '' && (!$economy_mode ||
   4528                 (isset($this->lexic_permissions['REGEXPS'][$group]) &&
   4529                 $this->lexic_permissions['REGEXPS'][$group]))) {
   4530                 if (is_array($this->language_data['REGEXPS'][$group]) &&
   4531                     array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$group])) {
   4532                     $stylesheet .= "$selector.";
   4533                     $stylesheet .= $this->language_data['REGEXPS'][$group][GESHI_CLASS];
   4534                     $stylesheet .= " {{$styles}}\n";
   4535                 } else {
   4536                     $stylesheet .= "$selector.re$group {{$styles}}\n";
   4537                 }
   4538             }
   4539         }
   4540         // Styles for lines being highlighted extra
   4541         if (!$economy_mode || (count($this->highlight_extra_lines)!=count($this->highlight_extra_lines_styles))) {
   4542             $stylesheet .= "{$selector}.ln-xtra, {$selector}li.ln-xtra, {$selector}div.ln-xtra {{$this->highlight_extra_lines_style}}\n";
   4543         }
   4544         $stylesheet .= "{$selector}span.xtra { display:block; }\n";
   4545         foreach ($this->highlight_extra_lines_styles as $lineid => $linestyle) {
   4546             $stylesheet .= "{$selector}.lx$lineid, {$selector}li.lx$lineid, {$selector}div.lx$lineid {{$linestyle}}\n";
   4547         }
   4548 
   4549         return $stylesheet;
   4550     }
   4551 
   4552     /**
   4553      * Get's the style that is used for the specified line
   4554      *
   4555      * @param int The line number information is requested for
   4556      * @access private
   4557      * @since 1.0.7.21
   4558      */
   4559     function get_line_style($line) {
   4560         //$style = null;
   4561         $style = null;
   4562         if (isset($this->highlight_extra_lines_styles[$line])) {
   4563             $style = $this->highlight_extra_lines_styles[$line];
   4564         } else { // if no "extra" style assigned
   4565             $style = $this->highlight_extra_lines_style;
   4566         }
   4567 
   4568         return $style;
   4569     }
   4570 
   4571     /**
   4572     * this functions creates an optimized regular expression list
   4573     * of an array of strings.
   4574     *
   4575     * Example:
   4576     * <code>$list = array('faa', 'foo', 'foobar');
   4577     *          => string 'f(aa|oo(bar)?)'</code>
   4578     *
   4579     * @param $list array of (unquoted) strings
   4580     * @param $regexp_delimiter your regular expression delimiter, @see preg_quote()
   4581     * @return string for regular expression
   4582     * @author Milian Wolff <mail@milianw.de>
   4583     * @since 1.0.8
   4584     * @access private
   4585     */
   4586     function optimize_regexp_list($list, $regexp_delimiter = '/') {
   4587         $regex_chars = array('.', '\\', '+', '-', '*', '?', '[', '^', ']', '$',
   4588             '(', ')', '{', '}', '=', '!', '<', '>', '|', ':', $regexp_delimiter);
   4589         sort($list);
   4590         $regexp_list = array('');
   4591         $num_subpatterns = 0;
   4592         $list_key = 0;
   4593 
   4594         // the tokens which we will use to generate the regexp list
   4595         $tokens = array();
   4596         $prev_keys = array();
   4597         // go through all entries of the list and generate the token list
   4598         $cur_len = 0;
   4599         for ($i = 0, $i_max = count($list); $i < $i_max; ++$i) {
   4600             if ($cur_len > GESHI_MAX_PCRE_LENGTH) {
   4601                 // seems like the length of this pcre is growing exorbitantly
   4602                 $regexp_list[++$list_key] = $this->_optimize_regexp_list_tokens_to_string($tokens);
   4603                 $num_subpatterns = substr_count($regexp_list[$list_key], '(?:');
   4604                 $tokens = array();
   4605                 $cur_len = 0;
   4606             }
   4607             $level = 0;
   4608             $entry = preg_quote((string) $list[$i], $regexp_delimiter);
   4609             $pointer = &$tokens;
   4610             // properly assign the new entry to the correct position in the token array
   4611             // possibly generate smaller common denominator keys
   4612             while (true) {
   4613                 // get the common denominator
   4614                 if (isset($prev_keys[$level])) {
   4615                     if ($prev_keys[$level] == $entry) {
   4616                         // this is a duplicate entry, skip it
   4617                         continue 2;
   4618                     }
   4619                     $char = 0;
   4620                     while (isset($entry[$char]) && isset($prev_keys[$level][$char])
   4621                             && $entry[$char] == $prev_keys[$level][$char]) {
   4622                         ++$char;
   4623                     }
   4624                     if ($char > 0) {
   4625                         // this entry has at least some chars in common with the current key
   4626                         if ($char == strlen($prev_keys[$level])) {
   4627                             // current key is totally matched, i.e. this entry has just some bits appended
   4628                             $pointer = &$pointer[$prev_keys[$level]];
   4629                         } else {
   4630                             // only part of the keys match
   4631                             $new_key_part1 = substr($prev_keys[$level], 0, $char);
   4632                             $new_key_part2 = substr($prev_keys[$level], $char);
   4633 
   4634                             if (in_array($new_key_part1[0], $regex_chars)
   4635                                 || in_array($new_key_part2[0], $regex_chars)) {
   4636                                 // this is bad, a regex char as first character
   4637                                 $pointer[$entry] = array('' => true);
   4638                                 array_splice($prev_keys, $level, count($prev_keys), $entry);
   4639                                 $cur_len += strlen($entry);
   4640                                 continue;
   4641                             } else {
   4642                                 // relocate previous tokens
   4643                                 $pointer[$new_key_part1] = array($new_key_part2 => $pointer[$prev_keys[$level]]);
   4644                                 unset($pointer[$prev_keys[$level]]);
   4645                                 $pointer = &$pointer[$new_key_part1];
   4646                                 // recreate key index
   4647                                 array_splice($prev_keys, $level, count($prev_keys), array($new_key_part1, $new_key_part2));
   4648                                 $cur_len += strlen($new_key_part2);
   4649                             }
   4650                         }
   4651                         ++$level;
   4652                         $entry = substr($entry, $char);
   4653                         continue;
   4654                     }
   4655                     // else: fall trough, i.e. no common denominator was found
   4656                 }
   4657                 if ($level == 0 && !empty($tokens)) {
   4658                     // we can dump current tokens into the string and throw them away afterwards
   4659                     $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
   4660                     $new_subpatterns = substr_count($new_entry, '(?:');
   4661                     if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + $new_subpatterns > GESHI_MAX_PCRE_SUBPATTERNS) {
   4662                         $regexp_list[++$list_key] = $new_entry;
   4663                         $num_subpatterns = $new_subpatterns;
   4664                     } else {
   4665                         if (!empty($regexp_list[$list_key])) {
   4666                             $new_entry = '|' . $new_entry;
   4667                         }
   4668                         $regexp_list[$list_key] .= $new_entry;
   4669                         $num_subpatterns += $new_subpatterns;
   4670                     }
   4671                     $tokens = array();
   4672                     $cur_len = 0;
   4673                 }
   4674                 // no further common denominator found
   4675                 $pointer[$entry] = array('' => true);
   4676                 array_splice($prev_keys, $level, count($prev_keys), $entry);
   4677 
   4678                 $cur_len += strlen($entry);
   4679                 break;
   4680             }
   4681             unset($list[$i]);
   4682         }
   4683         // make sure the last tokens get converted as well
   4684         $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
   4685         if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + substr_count($new_entry, '(?:') > GESHI_MAX_PCRE_SUBPATTERNS) {
   4686             if ( !empty($regexp_list[$list_key]) ) {
   4687               ++$list_key;
   4688             }
   4689             $regexp_list[$list_key] = $new_entry;
   4690         } else {
   4691             if (!empty($regexp_list[$list_key])) {
   4692                 $new_entry = '|' . $new_entry;
   4693             }
   4694             $regexp_list[$list_key] .= $new_entry;
   4695         }
   4696         return $regexp_list;
   4697     }
   4698     /**
   4699     * this function creates the appropriate regexp string of an token array
   4700     * you should not call this function directly, @see $this->optimize_regexp_list().
   4701     *
   4702     * @param &$tokens array of tokens
   4703     * @param $recursed bool to know wether we recursed or not
   4704     * @return string
   4705     * @author Milian Wolff <mail@milianw.de>
   4706     * @since 1.0.8
   4707     * @access private
   4708     */
   4709     function _optimize_regexp_list_tokens_to_string(&$tokens, $recursed = false) {
   4710         $list = '';
   4711         foreach ($tokens as $token => $sub_tokens) {
   4712             $list .= $token;
   4713             $close_entry = isset($sub_tokens['']);
   4714             unset($sub_tokens['']);
   4715             if (!empty($sub_tokens)) {
   4716                 $list .= '(?:' . $this->_optimize_regexp_list_tokens_to_string($sub_tokens, true) . ')';
   4717                 if ($close_entry) {
   4718                     // make sub_tokens optional
   4719                     $list .= '?';
   4720                 }
   4721             }
   4722             $list .= '|';
   4723         }
   4724         if (!$recursed) {
   4725             // do some optimizations
   4726             // common trailing strings
   4727             // BUGGY!
   4728             //$list = preg_replace_callback('#(?<=^|\:|\|)\w+?(\w+)(?:\|.+\1)+(?=\|)#', create_function(
   4729             //    '$matches', 'return "(?:" . preg_replace("#" . preg_quote($matches[1], "#") . "(?=\||$)#", "", $matches[0]) . ")" . $matches[1];'), $list);
   4730             // (?:p)? => p?
   4731             $list = preg_replace('#\(\?\:(.)\)\?#', '\1?', $list);
   4732             // (?:a|b|c|d|...)? => [abcd...]?
   4733             // TODO: a|bb|c => [ac]|bb
   4734             static $callback_2;
   4735             if (!isset($callback_2)) {
   4736                 $callback_2 = function( $matches ) {
   4737                     return "[" . str_replace("|", "", $matches[1]) . "]";
   4738                 };
   4739             }
   4740             $list = preg_replace_callback('#\(\?\:((?:.\|)+.)\)#', $callback_2, $list);
   4741         }
   4742         // return $list without trailing pipe
   4743         return substr($list, 0, -1);
   4744     }
   4745 } // End Class GeSHi
   4746 
   4747 
   4748 if (!function_exists('geshi_highlight')) {
   4749     /**
   4750      * Easy way to highlight stuff. Behaves just like highlight_string
   4751      *
   4752      * @param string The code to highlight
   4753      * @param string The language to highlight the code in
   4754      * @param string The path to the language files. You can leave this blank if you need
   4755      *               as from version 1.0.7 the path should be automatically detected
   4756      * @param boolean Whether to return the result or to echo
   4757      * @return string The code highlighted (if $return is true)
   4758      * @since 1.0.2
   4759      */
   4760     function geshi_highlight($string, $language, $path = null, $return = false) {
   4761         $geshi = new GeSHi($string, $language, $path);
   4762         $geshi->set_header_type(GESHI_HEADER_NONE);
   4763 
   4764         if ($return) {
   4765             return '<code>' . $geshi->parse_code() . '</code>';
   4766         }
   4767 
   4768         echo '<code>' . $geshi->parse_code() . '</code>';
   4769 
   4770         if ($geshi->error()) {
   4771             return false;
   4772         }
   4773         return true;
   4774     }
   4775 }
   4776 
   4777 ?>