File <code>geshi/geshi.php</code>

File geshi/geshi.php
Last commit: Sun Dec 9 23:32:58 2018 +0100	Jan Dankert	Fix: Geshi PHP7-fähig

     1 <?php
     2 /**
     3  * GeSHi - Generic Syntax Highlighter
     4  *
     5  * The GeSHi class for Generic Syntax Highlighting. Please refer to the
     6  * documentation at http://qbnz.com/highlighter/documentation.php for more
     7  * information about how to use this class.
     8  *
     9  * For changes, release notes, TODOs etc, see the relevant files in the docs/
    10  * directory.
    11  *
    12  *   This file is part of GeSHi.
    13  *
    14  *  GeSHi is free software; you can redistribute it and/or modify
    15  *  it under the terms of the GNU General Public License as published by
    16  *  the Free Software Foundation; either version 2 of the License, or
    17  *  (at your option) any later version.
    18  *
    19  *  GeSHi is distributed in the hope that it will be useful,
    20  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
    21  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    22  *  GNU General Public License for more details.
    23  *
    24  *  You should have received a copy of the GNU General Public License
    25  *  along with GeSHi; if not, write to the Free Software
    26  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    27  *
    28  * @package    geshi
    29  * @subpackage core
    30  * @author     Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de>
    31  * @copyright  (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
    32  * @license    http://gnu.org/copyleft/gpl.html GNU GPL
    33  *
    34  */
    35 
    36 //
    37 // GeSHi Constants
    38 // You should use these constant names in your programs instead of
    39 // their values - you never know when a value may change in a future
    40 // version
    41 //
    42 
    43 /** The version of this GeSHi file */
    44 define('GESHI_VERSION', '1.0.8.11');
    45 
    46 // Define the root directory for the GeSHi code tree
    47 if (!defined('GESHI_ROOT')) {
    48     /** The root directory for GeSHi */
    49     define('GESHI_ROOT', dirname(__FILE__) . DIRECTORY_SEPARATOR);
    50 }
    51 /** The language file directory for GeSHi
    52     @access private */
    53 define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . DIRECTORY_SEPARATOR);
    54 
    55 // Define if GeSHi should be paranoid about security
    56 if (!defined('GESHI_SECURITY_PARANOID')) {
    57     /** Tells GeSHi to be paranoid about security settings */
    58     define('GESHI_SECURITY_PARANOID', false);
    59 }
    60 
    61 // Line numbers - use with enable_line_numbers()
    62 /** Use no line numbers when building the result */
    63 define('GESHI_NO_LINE_NUMBERS', 0);
    64 /** Use normal line numbers when building the result */
    65 define('GESHI_NORMAL_LINE_NUMBERS', 1);
    66 /** Use fancy line numbers when building the result */
    67 define('GESHI_FANCY_LINE_NUMBERS', 2);
    68 
    69 // Container HTML type
    70 /** Use nothing to surround the source */
    71 define('GESHI_HEADER_NONE', 0);
    72 /** Use a "div" to surround the source */
    73 define('GESHI_HEADER_DIV', 1);
    74 /** Use a "pre" to surround the source */
    75 define('GESHI_HEADER_PRE', 2);
    76 /** Use a pre to wrap lines when line numbers are enabled or to wrap the whole code. */
    77 define('GESHI_HEADER_PRE_VALID', 3);
    78 /**
    79  * Use a "table" to surround the source:
    80  *
    81  *  <table>
    82  *    <thead><tr><td colspan="2">$header</td></tr></thead>
    83  *    <tbody><tr><td><pre>$linenumbers</pre></td><td><pre>$code></pre></td></tr></tbody>
    84  *    <tfooter><tr><td colspan="2">$footer</td></tr></tfoot>
    85  *  </table>
    86  *
    87  * this is essentially only a workaround for Firefox, see sf#1651996 or take a look at
    88  * https://bugzilla.mozilla.org/show_bug.cgi?id=365805
    89  * @note when linenumbers are disabled this is essentially the same as GESHI_HEADER_PRE
    90  */
    91 define('GESHI_HEADER_PRE_TABLE', 4);
    92 
    93 // Capatalisation constants
    94 /** Lowercase keywords found */
    95 define('GESHI_CAPS_NO_CHANGE', 0);
    96 /** Uppercase keywords found */
    97 define('GESHI_CAPS_UPPER', 1);
    98 /** Leave keywords found as the case that they are */
    99 define('GESHI_CAPS_LOWER', 2);
   100 
   101 // Link style constants
   102 /** Links in the source in the :link state */
   103 define('GESHI_LINK', 0);
   104 /** Links in the source in the :hover state */
   105 define('GESHI_HOVER', 1);
   106 /** Links in the source in the :active state */
   107 define('GESHI_ACTIVE', 2);
   108 /** Links in the source in the :visited state */
   109 define('GESHI_VISITED', 3);
   110 
   111 // Important string starter/finisher
   112 // Note that if you change these, they should be as-is: i.e., don't
   113 // write them as if they had been run through htmlentities()
   114 /** The starter for important parts of the source */
   115 define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>');
   116 /** The ender for important parts of the source */
   117 define('GESHI_END_IMPORTANT', '<END GeSHi>');
   118 
   119 /**#@+
   120  *  @access private
   121  */
   122 // When strict mode applies for a language
   123 /** Strict mode never applies (this is the most common) */
   124 define('GESHI_NEVER', 0);
   125 /** Strict mode *might* apply, and can be enabled or
   126     disabled by {@link GeSHi->enable_strict_mode()} */
   127 define('GESHI_MAYBE', 1);
   128 /** Strict mode always applies */
   129 define('GESHI_ALWAYS', 2);
   130 
   131 // Advanced regexp handling constants, used in language files
   132 /** The key of the regex array defining what to search for */
   133 define('GESHI_SEARCH', 0);
   134 /** The key of the regex array defining what bracket group in a
   135     matched search to use as a replacement */
   136 define('GESHI_REPLACE', 1);
   137 /** The key of the regex array defining any modifiers to the regular expression */
   138 define('GESHI_MODIFIERS', 2);
   139 /** The key of the regex array defining what bracket group in a
   140     matched search to put before the replacement */
   141 define('GESHI_BEFORE', 3);
   142 /** The key of the regex array defining what bracket group in a
   143     matched search to put after the replacement */
   144 define('GESHI_AFTER', 4);
   145 /** The key of the regex array defining a custom keyword to use
   146     for this regexp's html tag class */
   147 define('GESHI_CLASS', 5);
   148 
   149 /** Used in language files to mark comments */
   150 define('GESHI_COMMENTS', 0);
   151 
   152 /** Used to work around missing PHP features **/
   153 define('GESHI_PHP_PRE_433', !(version_compare(PHP_VERSION, '4.3.3') === 1));
   154 
   155 /** make sure we can call stripos **/
   156 if (!function_exists('stripos')) {
   157     // the offset param of preg_match is not supported below PHP 4.3.3
   158     if (GESHI_PHP_PRE_433) {
   159         /**
   160          * @ignore
   161          */
   162         function stripos($haystack, $needle, $offset = null) {
   163             if (!is_null($offset)) {
   164                 $haystack = substr($haystack, $offset);
   165             }
   166             if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE)) {
   167                 return $match[0][1];
   168             }
   169             return false;
   170         }
   171     }
   172     else {
   173         /**
   174          * @ignore
   175          */
   176         function stripos($haystack, $needle, $offset = null) {
   177             if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE, $offset)) {
   178                 return $match[0][1];
   179             }
   180             return false;
   181         }
   182     }
   183 }
   184 
   185 /** some old PHP / PCRE subpatterns only support up to xxx subpatterns in
   186     regular expressions. Set this to false if your PCRE lib is up to date
   187     @see GeSHi->optimize_regexp_list()
   188     **/
   189 define('GESHI_MAX_PCRE_SUBPATTERNS', 500);
   190 /** it's also important not to generate too long regular expressions
   191     be generous here... but keep in mind, that when reaching this limit we
   192     still have to close open patterns. 12k should do just fine on a 16k limit.
   193     @see GeSHi->optimize_regexp_list()
   194     **/
   195 define('GESHI_MAX_PCRE_LENGTH', 12288);
   196 
   197 //Number format specification
   198 /** Basic number format for integers */
   199 define('GESHI_NUMBER_INT_BASIC', 1);        //Default integers \d+
   200 /** Enhanced number format for integers like seen in C */
   201 define('GESHI_NUMBER_INT_CSTYLE', 2);       //Default C-Style \d+[lL]?
   202 /** Number format to highlight binary numbers with a suffix "b" */
   203 define('GESHI_NUMBER_BIN_SUFFIX', 16);           //[01]+[bB]
   204 /** Number format to highlight binary numbers with a prefix % */
   205 define('GESHI_NUMBER_BIN_PREFIX_PERCENT', 32);   //%[01]+
   206 /** Number format to highlight binary numbers with a prefix 0b (C) */
   207 define('GESHI_NUMBER_BIN_PREFIX_0B', 64);        //0b[01]+
   208 /** Number format to highlight octal numbers with a leading zero */
   209 define('GESHI_NUMBER_OCT_PREFIX', 256);           //0[0-7]+
   210 /** Number format to highlight octal numbers with a prefix 0o (logtalk) */
   211 define('GESHI_NUMBER_OCT_PREFIX_0O', 512);           //0[0-7]+
   212 /** Number format to highlight octal numbers with a leading @ (Used in HiSofts Devpac series). */
   213 define('GESHI_NUMBER_OCT_PREFIX_AT', 1024);           //@[0-7]+
   214 /** Number format to highlight octal numbers with a suffix of o */
   215 define('GESHI_NUMBER_OCT_SUFFIX', 2048);           //[0-7]+[oO]
   216 /** Number format to highlight hex numbers with a prefix 0x */
   217 define('GESHI_NUMBER_HEX_PREFIX', 4096);           //0x[0-9a-fA-F]+
   218 /** Number format to highlight hex numbers with a prefix $ */
   219 define('GESHI_NUMBER_HEX_PREFIX_DOLLAR', 8192);           //$[0-9a-fA-F]+
   220 /** Number format to highlight hex numbers with a suffix of h */
   221 define('GESHI_NUMBER_HEX_SUFFIX', 16384);           //[0-9][0-9a-fA-F]*h
   222 /** Number format to highlight floating-point numbers without support for scientific notation */
   223 define('GESHI_NUMBER_FLT_NONSCI', 65536);          //\d+\.\d+
   224 /** Number format to highlight floating-point numbers without support for scientific notation */
   225 define('GESHI_NUMBER_FLT_NONSCI_F', 131072);       //\d+(\.\d+)?f
   226 /** Number format to highlight floating-point numbers with support for scientific notation (E) and optional leading zero */
   227 define('GESHI_NUMBER_FLT_SCI_SHORT', 262144);      //\.\d+e\d+
   228 /** Number format to highlight floating-point numbers with support for scientific notation (E) and required leading digit */
   229 define('GESHI_NUMBER_FLT_SCI_ZERO', 524288);       //\d+(\.\d+)?e\d+
   230 //Custom formats are passed by RX array
   231 
   232 // Error detection - use these to analyse faults
   233 /** No sourcecode to highlight was specified
   234  * @deprecated
   235  */
   236 define('GESHI_ERROR_NO_INPUT', 1);
   237 /** The language specified does not exist */
   238 define('GESHI_ERROR_NO_SUCH_LANG', 2);
   239 /** GeSHi could not open a file for reading (generally a language file) */
   240 define('GESHI_ERROR_FILE_NOT_READABLE', 3);
   241 /** The header type passed to {@link GeSHi->set_header_type()} was invalid */
   242 define('GESHI_ERROR_INVALID_HEADER_TYPE', 4);
   243 /** The line number type passed to {@link GeSHi->enable_line_numbers()} was invalid */
   244 define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5);
   245 /**#@-*/
   246 
   247 
   248 /**
   249  * The GeSHi Class.
   250  *
   251  * Please refer to the documentation for GeSHi 1.0.X that is available
   252  * at http://qbnz.com/highlighter/documentation.php for more information
   253  * about how to use this class.
   254  *
   255  * @package   geshi
   256  * @author    Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de>
   257  * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
   258  */
   259 class GeSHi {
   260     /**#@+
   261      * @access private
   262      */
   263     /**
   264      * The source code to highlight
   265      * @var string
   266      */
   267     var $source = '';
   268 
   269     /**
   270      * The language to use when highlighting
   271      * @var string
   272      */
   273     var $language = '';
   274 
   275     /**
   276      * The data for the language used
   277      * @var array
   278      */
   279     var $language_data = array();
   280 
   281     /**
   282      * The path to the language files
   283      * @var string
   284      */
   285     var $language_path = GESHI_LANG_ROOT;
   286 
   287     /**
   288      * The error message associated with an error
   289      * @var string
   290      * @todo check err reporting works
   291      */
   292     var $error = false;
   293 
   294     /**
   295      * Possible error messages
   296      * @var array
   297      */
   298     var $error_messages = array(
   299         GESHI_ERROR_NO_SUCH_LANG => 'GeSHi could not find the language {LANGUAGE} (using path {PATH})',
   300         GESHI_ERROR_FILE_NOT_READABLE => 'The file specified for load_from_file was not readable',
   301         GESHI_ERROR_INVALID_HEADER_TYPE => 'The header type specified is invalid',
   302         GESHI_ERROR_INVALID_LINE_NUMBER_TYPE => 'The line number type specified is invalid'
   303     );
   304 
   305     /**
   306      * Whether highlighting is strict or not
   307      * @var boolean
   308      */
   309     var $strict_mode = false;
   310 
   311     /**
   312      * Whether to use CSS classes in output
   313      * @var boolean
   314      */
   315     var $use_classes = false;
   316 
   317     /**
   318      * The type of header to use. Can be one of the following
   319      * values:
   320      *
   321      * - GESHI_HEADER_PRE: Source is outputted in a "pre" HTML element.
   322      * - GESHI_HEADER_DIV: Source is outputted in a "div" HTML element.
   323      * - GESHI_HEADER_NONE: No header is outputted.
   324      *
   325      * @var int
   326      */
   327     var $header_type = GESHI_HEADER_PRE;
   328 
   329     /**
   330      * Array of permissions for which lexics should be highlighted
   331      * @var array
   332      */
   333     var $lexic_permissions = array(
   334         'KEYWORDS' =>    array(),
   335         'COMMENTS' =>    array('MULTI' => true),
   336         'REGEXPS' =>     array(),
   337         'ESCAPE_CHAR' => true,
   338         'BRACKETS' =>    true,
   339         'SYMBOLS' =>     false,
   340         'STRINGS' =>     true,
   341         'NUMBERS' =>     true,
   342         'METHODS' =>     true,
   343         'SCRIPT' =>      true
   344     );
   345 
   346     /**
   347      * The time it took to parse the code
   348      * @var double
   349      */
   350     var $time = 0;
   351 
   352     /**
   353      * The content of the header block
   354      * @var string
   355      */
   356     var $header_content = '';
   357 
   358     /**
   359      * The content of the footer block
   360      * @var string
   361      */
   362     var $footer_content = '';
   363 
   364     /**
   365      * The style of the header block
   366      * @var string
   367      */
   368     var $header_content_style = '';
   369 
   370     /**
   371      * The style of the footer block
   372      * @var string
   373      */
   374     var $footer_content_style = '';
   375 
   376     /**
   377      * Tells if a block around the highlighted source should be forced
   378      * if not using line numbering
   379      * @var boolean
   380      */
   381     var $force_code_block = false;
   382 
   383     /**
   384      * The styles for hyperlinks in the code
   385      * @var array
   386      */
   387     var $link_styles = array();
   388 
   389     /**
   390      * Whether important blocks should be recognised or not
   391      * @var boolean
   392      * @deprecated
   393      * @todo REMOVE THIS FUNCTIONALITY!
   394      */
   395     var $enable_important_blocks = false;
   396 
   397     /**
   398      * Styles for important parts of the code
   399      * @var string
   400      * @deprecated
   401      * @todo As above - rethink the whole idea of important blocks as it is buggy and
   402      * will be hard to implement in 1.2
   403      */
   404     var $important_styles = 'font-weight: bold; color: red;'; // Styles for important parts of the code
   405 
   406     /**
   407      * Whether CSS IDs should be added to the code
   408      * @var boolean
   409      */
   410     var $add_ids = false;
   411 
   412     /**
   413      * Lines that should be highlighted extra
   414      * @var array
   415      */
   416     var $highlight_extra_lines = array();
   417 
   418     /**
   419      * Styles of lines that should be highlighted extra
   420      * @var array
   421      */
   422     var $highlight_extra_lines_styles = array();
   423 
   424     /**
   425      * Styles of extra-highlighted lines
   426      * @var string
   427      */
   428     var $highlight_extra_lines_style = 'background-color: #ffc;';
   429 
   430     /**
   431      * The line ending
   432      * If null, nl2br() will be used on the result string.
   433      * Otherwise, all instances of \n will be replaced with $line_ending
   434      * @var string
   435      */
   436     var $line_ending = null;
   437 
   438     /**
   439      * Number at which line numbers should start at
   440      * @var int
   441      */
   442     var $line_numbers_start = 1;
   443 
   444     /**
   445      * The overall style for this code block
   446      * @var string
   447      */
   448     var $overall_style = 'font-family:monospace;';
   449 
   450     /**
   451      *  The style for the actual code
   452      * @var string
   453      */
   454     var $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;';
   455 
   456     /**
   457      * The overall class for this code block
   458      * @var string
   459      */
   460     var $overall_class = '';
   461 
   462     /**
   463      * The overall ID for this code block
   464      * @var string
   465      */
   466     var $overall_id = '';
   467 
   468     /**
   469      * Line number styles
   470      * @var string
   471      */
   472     var $line_style1 = 'font-weight: normal; vertical-align:top;';
   473 
   474     /**
   475      * Line number styles for fancy lines
   476      * @var string
   477      */
   478     var $line_style2 = 'font-weight: bold; vertical-align:top;';
   479 
   480     /**
   481      * Style for line numbers when GESHI_HEADER_PRE_TABLE is chosen
   482      * @var string
   483      */
   484     var $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;';
   485 
   486     /**
   487      * Flag for how line numbers are displayed
   488      * @var boolean
   489      */
   490     var $line_numbers = GESHI_NO_LINE_NUMBERS;
   491 
   492     /**
   493      * Flag to decide if multi line spans are allowed. Set it to false to make sure
   494      * each tag is closed before and reopened after each linefeed.
   495      * @var boolean
   496      */
   497     var $allow_multiline_span = true;
   498 
   499     /**
   500      * The "nth" value for fancy line highlighting
   501      * @var int
   502      */
   503     var $line_nth_row = 0;
   504 
   505     /**
   506      * The size of tab stops
   507      * @var int
   508      */
   509     var $tab_width = 8;
   510 
   511     /**
   512      * Should we use language-defined tab stop widths?
   513      * @var int
   514      */
   515     var $use_language_tab_width = false;
   516 
   517     /**
   518      * Default target for keyword links
   519      * @var string
   520      */
   521     var $link_target = '';
   522 
   523     /**
   524      * The encoding to use for entity encoding
   525      * NOTE: Used with Escape Char Sequences to fix UTF-8 handling (cf. SF#2037598)
   526      * @var string
   527      */
   528     var $encoding = 'utf-8';
   529 
   530     /**
   531      * Should keywords be linked?
   532      * @var boolean
   533      */
   534     var $keyword_links = true;
   535 
   536     /**
   537      * Currently loaded language file
   538      * @var string
   539      * @since 1.0.7.22
   540      */
   541     var $loaded_language = '';
   542 
   543     /**
   544      * Wether the caches needed for parsing are built or not
   545      *
   546      * @var bool
   547      * @since 1.0.8
   548      */
   549     var $parse_cache_built = false;
   550 
   551     /**
   552      * Work around for Suhosin Patch with disabled /e modifier
   553      *
   554      * Note from suhosins author in config file:
   555      * <blockquote>
   556      *   The /e modifier inside <code>preg_replace()</code> allows code execution.
   557      *   Often it is the cause for remote code execution exploits. It is wise to
   558      *   deactivate this feature and test where in the application it is used.
   559      *   The developer using the /e modifier should be made aware that he should
   560      *   use <code>preg_replace_callback()</code> instead
   561      * </blockquote>
   562      *
   563      * @var array
   564      * @since 1.0.8
   565      */
   566     var $_kw_replace_group = 0;
   567     var $_rx_key = 0;
   568 
   569     /**
   570      * some "callback parameters" for handle_multiline_regexps
   571      *
   572      * @since 1.0.8
   573      * @access private
   574      * @var string
   575      */
   576     var $_hmr_before = '';
   577     var $_hmr_replace = '';
   578     var $_hmr_after = '';
   579     var $_hmr_key = 0;
   580 
   581     /**#@-*/
   582 
   583     /**
   584      * Creates a new GeSHi object, with source and language
   585      *
   586      * @param string The source code to highlight
   587      * @param string The language to highlight the source with
   588      * @param string The path to the language file directory. <b>This
   589      *               is deprecated!</b> I've backported the auto path
   590      *               detection from the 1.1.X dev branch, so now it
   591      *               should be automatically set correctly. If you have
   592      *               renamed the language directory however, you will
   593      *               still need to set the path using this parameter or
   594      *               {@link GeSHi->set_language_path()}
   595      * @since 1.0.0
   596      */
   597     function __construct($source = '', $language = '', $path = '') {
   598         if (!empty($source)) {
   599             $this->set_source($source);
   600         }
   601         if (!empty($language)) {
   602             $this->set_language($language);
   603         }
   604         $this->set_language_path($path);
   605     }
   606 
   607     /**
   608      * Returns the version of GeSHi
   609      *
   610      * @return string
   611      * @since 1 0.8.11
   612      */
   613     function get_version()
   614     {
   615         return GESHI_VERSION;
   616     }
   617 
   618     /**
   619      * Returns an error message associated with the last GeSHi operation,
   620      * or false if no error has occured
   621      *
   622      * @return string|false An error message if there has been an error, else false
   623      * @since  1.0.0
   624      */
   625     function error() {
   626         if ($this->error) {
   627             //Put some template variables for debugging here ...
   628             $debug_tpl_vars = array(
   629                 '{LANGUAGE}' => $this->language,
   630                 '{PATH}' => $this->language_path
   631             );
   632             $msg = str_replace(
   633                 array_keys($debug_tpl_vars),
   634                 array_values($debug_tpl_vars),
   635                 $this->error_messages[$this->error]);
   636 
   637             return "<br /><strong>GeSHi Error:</strong> $msg (code {$this->error})<br />";
   638         }
   639         return false;
   640     }
   641 
   642     /**
   643      * Gets a human-readable language name (thanks to Simon Patterson
   644      * for the idea :))
   645      *
   646      * @return string The name for the current language
   647      * @since  1.0.2
   648      */
   649     function get_language_name() {
   650         if (GESHI_ERROR_NO_SUCH_LANG == $this->error) {
   651             return $this->language_data['LANG_NAME'] . ' (Unknown Language)';
   652         }
   653         return $this->language_data['LANG_NAME'];
   654     }
   655 
   656     /**
   657      * Sets the source code for this object
   658      *
   659      * @param string The source code to highlight
   660      * @since 1.0.0
   661      */
   662     function set_source($source) {
   663         $this->source = $source;
   664         $this->highlight_extra_lines = array();
   665     }
   666 
   667     /**
   668      * Sets the language for this object
   669      *
   670      * @note since 1.0.8 this function won't reset language-settings by default anymore!
   671      *       if you need this set $force_reset = true
   672      *
   673      * @param string The name of the language to use
   674      * @since 1.0.0
   675      */
   676     function set_language($language, $force_reset = false) {
   677         if ($force_reset) {
   678             $this->loaded_language = false;
   679         }
   680 
   681         //Clean up the language name to prevent malicious code injection
   682         $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
   683 
   684         $language = strtolower($language);
   685 
   686         //Retreive the full filename
   687         $file_name = $this->language_path . $language . '.php';
   688         if ($file_name == $this->loaded_language) {
   689             // this language is already loaded!
   690             return;
   691         }
   692 
   693         $this->language = $language;
   694 
   695         $this->error = false;
   696         $this->strict_mode = GESHI_NEVER;
   697 
   698         //Check if we can read the desired file
   699         if (!is_readable($file_name)) {
   700             $this->error = GESHI_ERROR_NO_SUCH_LANG;
   701             return;
   702         }
   703 
   704         // Load the language for parsing
   705         $this->load_language($file_name);
   706     }
   707 
   708     /**
   709      * Sets the path to the directory containing the language files. Note
   710      * that this path is relative to the directory of the script that included
   711      * geshi.php, NOT geshi.php itself.
   712      *
   713      * @param string The path to the language directory
   714      * @since 1.0.0
   715      * @deprecated The path to the language files should now be automatically
   716      *             detected, so this method should no longer be needed. The
   717      *             1.1.X branch handles manual setting of the path differently
   718      *             so this method will disappear in 1.2.0.
   719      */
   720     function set_language_path($path) {
   721         if(strpos($path,':')) {
   722             //Security Fix to prevent external directories using fopen wrappers.
   723             if(DIRECTORY_SEPARATOR == "\\") {
   724                 if(!preg_match('#^[a-zA-Z]:#', $path) || false !== strpos($path, ':', 2)) {
   725                     return;
   726                 }
   727             } else {
   728                 return;
   729             }
   730         }
   731         if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) {
   732             //Security Fix to prevent external directories using fopen wrappers.
   733             return;
   734         }
   735         if(GESHI_SECURITY_PARANOID && false !== strpos($path, '/.')) {
   736             //Security Fix to prevent external directories using fopen wrappers.
   737             return;
   738         }
   739         if(GESHI_SECURITY_PARANOID && false !== strpos($path, '..')) {
   740             //Security Fix to prevent external directories using fopen wrappers.
   741             return;
   742         }
   743         if ($path) {
   744             $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/';
   745             $this->set_language($this->language); // otherwise set_language_path has no effect
   746         }
   747     }
   748 
   749     /**
   750      * Get supported langs or an associative array lang=>full_name.
   751      * @param boolean $longnames
   752      * @return array
   753      */
   754     function get_supported_languages($full_names=false)
   755     {
   756         // return array
   757         $back = array();
   758 
   759         // we walk the lang root
   760         $dir = dir($this->language_path);
   761 
   762         // foreach entry
   763         while (false !== ($entry = $dir->read()))
   764         {
   765             $full_path = $this->language_path.$entry;
   766 
   767             // Skip all dirs
   768             if (is_dir($full_path)) {
   769                 continue;
   770             }
   771 
   772             // we only want lang.php files
   773             if (!preg_match('/^([^.]+)\.php$/', $entry, $matches)) {
   774                 continue;
   775             }
   776 
   777             // Raw lang name is here
   778             $langname = $matches[1];
   779 
   780             // We want the fullname too?
   781             if ($full_names === true)
   782             {
   783                 if (false !== ($fullname = $this->get_language_fullname($langname)))
   784                 {
   785                     $back[$langname] = $fullname; // we go associative
   786                 }
   787             }
   788             else
   789             {
   790                 // just store raw langname
   791                 $back[] = $langname;
   792             }
   793         }
   794 
   795         $dir->close();
   796 
   797         return $back;
   798     }
   799 
   800     /**
   801      * Get full_name for a lang or false.
   802      * @param string $language short langname (html4strict for example)
   803      * @return mixed
   804      */
   805     function get_language_fullname($language)
   806     {
   807         //Clean up the language name to prevent malicious code injection
   808         $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
   809 
   810         $language = strtolower($language);
   811 
   812         // get fullpath-filename for a langname
   813         $fullpath = $this->language_path.$language.'.php';
   814 
   815         // we need to get contents :S
   816         if (false === ($data = file_get_contents($fullpath))) {
   817             $this->error = sprintf('Geshi::get_lang_fullname() Unknown Language: %s', $language);
   818             return false;
   819         }
   820 
   821         // match the langname
   822         if (!preg_match('/\'LANG_NAME\'\s*=>\s*\'((?:[^\']|\\\')+?)\'/', $data, $matches)) {
   823             $this->error = sprintf('Geshi::get_lang_fullname(%s): Regex can not detect language', $language);
   824             return false;
   825         }
   826 
   827         // return fullname for langname
   828         return stripcslashes($matches[1]);
   829     }
   830 
   831     /**
   832      * Sets the type of header to be used.
   833      *
   834      * If GESHI_HEADER_DIV is used, the code is surrounded in a "div".This
   835      * means more source code but more control over tab width and line-wrapping.
   836      * GESHI_HEADER_PRE means that a "pre" is used - less source, but less
   837      * control. Default is GESHI_HEADER_PRE.
   838      *
   839      * From 1.0.7.2, you can use GESHI_HEADER_NONE to specify that no header code
   840      * should be outputted.
   841      *
   842      * @param int The type of header to be used
   843      * @since 1.0.0
   844      */
   845     function set_header_type($type) {
   846         //Check if we got a valid header type
   847         if (!in_array($type, array(GESHI_HEADER_NONE, GESHI_HEADER_DIV,
   848             GESHI_HEADER_PRE, GESHI_HEADER_PRE_VALID, GESHI_HEADER_PRE_TABLE))) {
   849             $this->error = GESHI_ERROR_INVALID_HEADER_TYPE;
   850             return;
   851         }
   852 
   853         //Set that new header type
   854         $this->header_type = $type;
   855     }
   856 
   857     /**
   858      * Sets the styles for the code that will be outputted
   859      * when this object is parsed. The style should be a
   860      * string of valid stylesheet declarations
   861      *
   862      * @param string  The overall style for the outputted code block
   863      * @param boolean Whether to merge the styles with the current styles or not
   864      * @since 1.0.0
   865      */
   866     function set_overall_style($style, $preserve_defaults = false) {
   867         if (!$preserve_defaults) {
   868             $this->overall_style = $style;
   869         } else {
   870             $this->overall_style .= $style;
   871         }
   872     }
   873 
   874     /**
   875      * Sets the overall classname for this block of code. This
   876      * class can then be used in a stylesheet to style this object's
   877      * output
   878      *
   879      * @param string The class name to use for this block of code
   880      * @since 1.0.0
   881      */
   882     function set_overall_class($class) {
   883         $this->overall_class = $class;
   884     }
   885 
   886     /**
   887      * Sets the overall id for this block of code. This id can then
   888      * be used in a stylesheet to style this object's output
   889      *
   890      * @param string The ID to use for this block of code
   891      * @since 1.0.0
   892      */
   893     function set_overall_id($id) {
   894         $this->overall_id = $id;
   895     }
   896 
   897     /**
   898      * Sets whether CSS classes should be used to highlight the source. Default
   899      * is off, calling this method with no arguments will turn it on
   900      *
   901      * @param boolean Whether to turn classes on or not
   902      * @since 1.0.0
   903      */
   904     function enable_classes($flag = true) {
   905         $this->use_classes = ($flag) ? true : false;
   906     }
   907 
   908     /**
   909      * Sets the style for the actual code. This should be a string
   910      * containing valid stylesheet declarations. If $preserve_defaults is
   911      * true, then styles are merged with the default styles, with the
   912      * user defined styles having priority
   913      *
   914      * Note: Use this method to override any style changes you made to
   915      * the line numbers if you are using line numbers, else the line of
   916      * code will have the same style as the line number! Consult the
   917      * GeSHi documentation for more information about this.
   918      *
   919      * @param string  The style to use for actual code
   920      * @param boolean Whether to merge the current styles with the new styles
   921      * @since 1.0.2
   922      */
   923     function set_code_style($style, $preserve_defaults = false) {
   924         if (!$preserve_defaults) {
   925             $this->code_style = $style;
   926         } else {
   927             $this->code_style .= $style;
   928         }
   929     }
   930 
   931     /**
   932      * Sets the styles for the line numbers.
   933      *
   934      * @param string The style for the line numbers that are "normal"
   935      * @param string|boolean If a string, this is the style of the line
   936      *        numbers that are "fancy", otherwise if boolean then this
   937      *        defines whether the normal styles should be merged with the
   938      *        new normal styles or not
   939      * @param boolean If set, is the flag for whether to merge the "fancy"
   940      *        styles with the current styles or not
   941      * @since 1.0.2
   942      */
   943     function set_line_style($style1, $style2 = '', $preserve_defaults = false) {
   944         //Check if we got 2 or three parameters
   945         if (is_bool($style2)) {
   946             $preserve_defaults = $style2;
   947             $style2 = '';
   948         }
   949 
   950         //Actually set the new styles
   951         if (!$preserve_defaults) {
   952             $this->line_style1 = $style1;
   953             $this->line_style2 = $style2;
   954         } else {
   955             $this->line_style1 .= $style1;
   956             $this->line_style2 .= $style2;
   957         }
   958     }
   959 
   960     /**
   961      * Sets whether line numbers should be displayed.
   962      *
   963      * Valid values for the first parameter are:
   964      *
   965      *  - GESHI_NO_LINE_NUMBERS: Line numbers will not be displayed
   966      *  - GESHI_NORMAL_LINE_NUMBERS: Line numbers will be displayed
   967      *  - GESHI_FANCY_LINE_NUMBERS: Fancy line numbers will be displayed
   968      *
   969      * For fancy line numbers, the second parameter is used to signal which lines
   970      * are to be fancy. For example, if the value of this parameter is 5 then every
   971      * 5th line will be fancy.
   972      *
   973      * @param int How line numbers should be displayed
   974      * @param int Defines which lines are fancy
   975      * @since 1.0.0
   976      */
   977     function enable_line_numbers($flag, $nth_row = 5) {
   978         if (GESHI_NO_LINE_NUMBERS != $flag && GESHI_NORMAL_LINE_NUMBERS != $flag
   979             && GESHI_FANCY_LINE_NUMBERS != $flag) {
   980             $this->error = GESHI_ERROR_INVALID_LINE_NUMBER_TYPE;
   981         }
   982         $this->line_numbers = $flag;
   983         $this->line_nth_row = $nth_row;
   984     }
   985 
   986     /**
   987      * Sets wether spans and other HTML markup generated by GeSHi can
   988      * span over multiple lines or not. Defaults to true to reduce overhead.
   989      * Set it to false if you want to manipulate the output or manually display
   990      * the code in an ordered list.
   991      *
   992      * @param boolean Wether multiline spans are allowed or not
   993      * @since 1.0.7.22
   994      */
   995     function enable_multiline_span($flag) {
   996         $this->allow_multiline_span = (bool) $flag;
   997     }
   998 
   999     /**
  1000      * Get current setting for multiline spans, see GeSHi->enable_multiline_span().
  1001      *
  1002      * @see enable_multiline_span
  1003      * @return bool
  1004      */
  1005     function get_multiline_span() {
  1006         return $this->allow_multiline_span;
  1007     }
  1008 
  1009     /**
  1010      * Sets the style for a keyword group. If $preserve_defaults is
  1011      * true, then styles are merged with the default styles, with the
  1012      * user defined styles having priority
  1013      *
  1014      * @param int     The key of the keyword group to change the styles of
  1015      * @param string  The style to make the keywords
  1016      * @param boolean Whether to merge the new styles with the old or just
  1017      *                to overwrite them
  1018      * @since 1.0.0
  1019      */
  1020     function set_keyword_group_style($key, $style, $preserve_defaults = false) {
  1021         //Set the style for this keyword group
  1022         if (!$preserve_defaults) {
  1023             $this->language_data['STYLES']['KEYWORDS'][$key] = $style;
  1024         } else {
  1025             $this->language_data['STYLES']['KEYWORDS'][$key] .= $style;
  1026         }
  1027 
  1028         //Update the lexic permissions
  1029         if (!isset($this->lexic_permissions['KEYWORDS'][$key])) {
  1030             $this->lexic_permissions['KEYWORDS'][$key] = true;
  1031         }
  1032     }
  1033 
  1034     /**
  1035      * Turns highlighting on/off for a keyword group
  1036      *
  1037      * @param int     The key of the keyword group to turn on or off
  1038      * @param boolean Whether to turn highlighting for that group on or off
  1039      * @since 1.0.0
  1040      */
  1041     function set_keyword_group_highlighting($key, $flag = true) {
  1042         $this->lexic_permissions['KEYWORDS'][$key] = ($flag) ? true : false;
  1043     }
  1044 
  1045     /**
  1046      * Sets the styles for comment groups.  If $preserve_defaults is
  1047      * true, then styles are merged with the default styles, with the
  1048      * user defined styles having priority
  1049      *
  1050      * @param int     The key of the comment group to change the styles of
  1051      * @param string  The style to make the comments
  1052      * @param boolean Whether to merge the new styles with the old or just
  1053      *                to overwrite them
  1054      * @since 1.0.0
  1055      */
  1056     function set_comments_style($key, $style, $preserve_defaults = false) {
  1057         if (!$preserve_defaults) {
  1058             $this->language_data['STYLES']['COMMENTS'][$key] = $style;
  1059         } else {
  1060             $this->language_data['STYLES']['COMMENTS'][$key] .= $style;
  1061         }
  1062     }
  1063 
  1064     /**
  1065      * Turns highlighting on/off for comment groups
  1066      *
  1067      * @param int     The key of the comment group to turn on or off
  1068      * @param boolean Whether to turn highlighting for that group on or off
  1069      * @since 1.0.0
  1070      */
  1071     function set_comments_highlighting($key, $flag = true) {
  1072         $this->lexic_permissions['COMMENTS'][$key] = ($flag) ? true : false;
  1073     }
  1074 
  1075     /**
  1076      * Sets the styles for escaped characters. If $preserve_defaults is
  1077      * true, then styles are merged with the default styles, with the
  1078      * user defined styles having priority
  1079      *
  1080      * @param string  The style to make the escape characters
  1081      * @param boolean Whether to merge the new styles with the old or just
  1082      *                to overwrite them
  1083      * @since 1.0.0
  1084      */
  1085     function set_escape_characters_style($style, $preserve_defaults = false, $group = 0) {
  1086         if (!$preserve_defaults) {
  1087             $this->language_data['STYLES']['ESCAPE_CHAR'][$group] = $style;
  1088         } else {
  1089             $this->language_data['STYLES']['ESCAPE_CHAR'][$group] .= $style;
  1090         }
  1091     }
  1092 
  1093     /**
  1094      * Turns highlighting on/off for escaped characters
  1095      *
  1096      * @param boolean Whether to turn highlighting for escape characters on or off
  1097      * @since 1.0.0
  1098      */
  1099     function set_escape_characters_highlighting($flag = true) {
  1100         $this->lexic_permissions['ESCAPE_CHAR'] = ($flag) ? true : false;
  1101     }
  1102 
  1103     /**
  1104      * Sets the styles for brackets. If $preserve_defaults is
  1105      * true, then styles are merged with the default styles, with the
  1106      * user defined styles having priority
  1107      *
  1108      * This method is DEPRECATED: use set_symbols_style instead.
  1109      * This method will be removed in 1.2.X
  1110      *
  1111      * @param string  The style to make the brackets
  1112      * @param boolean Whether to merge the new styles with the old or just
  1113      *                to overwrite them
  1114      * @since 1.0.0
  1115      * @deprecated In favour of set_symbols_style
  1116      */
  1117     function set_brackets_style($style, $preserve_defaults = false) {
  1118         if (!$preserve_defaults) {
  1119             $this->language_data['STYLES']['BRACKETS'][0] = $style;
  1120         } else {
  1121             $this->language_data['STYLES']['BRACKETS'][0] .= $style;
  1122         }
  1123     }
  1124 
  1125     /**
  1126      * Turns highlighting on/off for brackets
  1127      *
  1128      * This method is DEPRECATED: use set_symbols_highlighting instead.
  1129      * This method will be remove in 1.2.X
  1130      *
  1131      * @param boolean Whether to turn highlighting for brackets on or off
  1132      * @since 1.0.0
  1133      * @deprecated In favour of set_symbols_highlighting
  1134      */
  1135     function set_brackets_highlighting($flag) {
  1136         $this->lexic_permissions['BRACKETS'] = ($flag) ? true : false;
  1137     }
  1138 
  1139     /**
  1140      * Sets the styles for symbols. If $preserve_defaults is
  1141      * true, then styles are merged with the default styles, with the
  1142      * user defined styles having priority
  1143      *
  1144      * @param string  The style to make the symbols
  1145      * @param boolean Whether to merge the new styles with the old or just
  1146      *                to overwrite them
  1147      * @param int     Tells the group of symbols for which style should be set.
  1148      * @since 1.0.1
  1149      */
  1150     function set_symbols_style($style, $preserve_defaults = false, $group = 0) {
  1151         // Update the style of symbols
  1152         if (!$preserve_defaults) {
  1153             $this->language_data['STYLES']['SYMBOLS'][$group] = $style;
  1154         } else {
  1155             $this->language_data['STYLES']['SYMBOLS'][$group] .= $style;
  1156         }
  1157 
  1158         // For backward compatibility
  1159         if (0 == $group) {
  1160             $this->set_brackets_style ($style, $preserve_defaults);
  1161         }
  1162     }
  1163 
  1164     /**
  1165      * Turns highlighting on/off for symbols
  1166      *
  1167      * @param boolean Whether to turn highlighting for symbols on or off
  1168      * @since 1.0.0
  1169      */
  1170     function set_symbols_highlighting($flag) {
  1171         // Update lexic permissions for this symbol group
  1172         $this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false;
  1173 
  1174         // For backward compatibility
  1175         $this->set_brackets_highlighting ($flag);
  1176     }
  1177 
  1178     /**
  1179      * Sets the styles for strings. If $preserve_defaults is
  1180      * true, then styles are merged with the default styles, with the
  1181      * user defined styles having priority
  1182      *
  1183      * @param string  The style to make the escape characters
  1184      * @param boolean Whether to merge the new styles with the old or just
  1185      *                to overwrite them
  1186      * @param int     Tells the group of strings for which style should be set.
  1187      * @since 1.0.0
  1188      */
  1189     function set_strings_style($style, $preserve_defaults = false, $group = 0) {
  1190         if (!$preserve_defaults) {
  1191             $this->language_data['STYLES']['STRINGS'][$group] = $style;
  1192         } else {
  1193             $this->language_data['STYLES']['STRINGS'][$group] .= $style;
  1194         }
  1195     }
  1196 
  1197     /**
  1198      * Turns highlighting on/off for strings
  1199      *
  1200      * @param boolean Whether to turn highlighting for strings on or off
  1201      * @since 1.0.0
  1202      */
  1203     function set_strings_highlighting($flag) {
  1204         $this->lexic_permissions['STRINGS'] = ($flag) ? true : false;
  1205     }
  1206 
  1207     /**
  1208      * Sets the styles for strict code blocks. If $preserve_defaults is
  1209      * true, then styles are merged with the default styles, with the
  1210      * user defined styles having priority
  1211      *
  1212      * @param string  The style to make the script blocks
  1213      * @param boolean Whether to merge the new styles with the old or just
  1214      *                to overwrite them
  1215      * @param int     Tells the group of script blocks for which style should be set.
  1216      * @since 1.0.8.4
  1217      */
  1218     function set_script_style($style, $preserve_defaults = false, $group = 0) {
  1219         // Update the style of symbols
  1220         if (!$preserve_defaults) {
  1221             $this->language_data['STYLES']['SCRIPT'][$group] = $style;
  1222         } else {
  1223             $this->language_data['STYLES']['SCRIPT'][$group] .= $style;
  1224         }
  1225     }
  1226 
  1227     /**
  1228      * Sets the styles for numbers. If $preserve_defaults is
  1229      * true, then styles are merged with the default styles, with the
  1230      * user defined styles having priority
  1231      *
  1232      * @param string  The style to make the numbers
  1233      * @param boolean Whether to merge the new styles with the old or just
  1234      *                to overwrite them
  1235      * @param int     Tells the group of numbers for which style should be set.
  1236      * @since 1.0.0
  1237      */
  1238     function set_numbers_style($style, $preserve_defaults = false, $group = 0) {
  1239         if (!$preserve_defaults) {
  1240             $this->language_data['STYLES']['NUMBERS'][$group] = $style;
  1241         } else {
  1242             $this->language_data['STYLES']['NUMBERS'][$group] .= $style;
  1243         }
  1244     }
  1245 
  1246     /**
  1247      * Turns highlighting on/off for numbers
  1248      *
  1249      * @param boolean Whether to turn highlighting for numbers on or off
  1250      * @since 1.0.0
  1251      */
  1252     function set_numbers_highlighting($flag) {
  1253         $this->lexic_permissions['NUMBERS'] = ($flag) ? true : false;
  1254     }
  1255 
  1256     /**
  1257      * Sets the styles for methods. $key is a number that references the
  1258      * appropriate "object splitter" - see the language file for the language
  1259      * you are highlighting to get this number. If $preserve_defaults is
  1260      * true, then styles are merged with the default styles, with the
  1261      * user defined styles having priority
  1262      *
  1263      * @param int     The key of the object splitter to change the styles of
  1264      * @param string  The style to make the methods
  1265      * @param boolean Whether to merge the new styles with the old or just
  1266      *                to overwrite them
  1267      * @since 1.0.0
  1268      */
  1269     function set_methods_style($key, $style, $preserve_defaults = false) {
  1270         if (!$preserve_defaults) {
  1271             $this->language_data['STYLES']['METHODS'][$key] = $style;
  1272         } else {
  1273             $this->language_data['STYLES']['METHODS'][$key] .= $style;
  1274         }
  1275     }
  1276 
  1277     /**
  1278      * Turns highlighting on/off for methods
  1279      *
  1280      * @param boolean Whether to turn highlighting for methods on or off
  1281      * @since 1.0.0
  1282      */
  1283     function set_methods_highlighting($flag) {
  1284         $this->lexic_permissions['METHODS'] = ($flag) ? true : false;
  1285     }
  1286 
  1287     /**
  1288      * Sets the styles for regexps. If $preserve_defaults is
  1289      * true, then styles are merged with the default styles, with the
  1290      * user defined styles having priority
  1291      *
  1292      * @param string  The style to make the regular expression matches
  1293      * @param boolean Whether to merge the new styles with the old or just
  1294      *                to overwrite them
  1295      * @since 1.0.0
  1296      */
  1297     function set_regexps_style($key, $style, $preserve_defaults = false) {
  1298         if (!$preserve_defaults) {
  1299             $this->language_data['STYLES']['REGEXPS'][$key] = $style;
  1300         } else {
  1301             $this->language_data['STYLES']['REGEXPS'][$key] .= $style;
  1302         }
  1303     }
  1304 
  1305     /**
  1306      * Turns highlighting on/off for regexps
  1307      *
  1308      * @param int     The key of the regular expression group to turn on or off
  1309      * @param boolean Whether to turn highlighting for the regular expression group on or off
  1310      * @since 1.0.0
  1311      */
  1312     function set_regexps_highlighting($key, $flag) {
  1313         $this->lexic_permissions['REGEXPS'][$key] = ($flag) ? true : false;
  1314     }
  1315 
  1316     /**
  1317      * Sets whether a set of keywords are checked for in a case sensitive manner
  1318      *
  1319      * @param int The key of the keyword group to change the case sensitivity of
  1320      * @param boolean Whether to check in a case sensitive manner or not
  1321      * @since 1.0.0
  1322      */
  1323     function set_case_sensitivity($key, $case) {
  1324         $this->language_data['CASE_SENSITIVE'][$key] = ($case) ? true : false;
  1325     }
  1326 
  1327     /**
  1328      * Sets the case that keywords should use when found. Use the constants:
  1329      *
  1330      *  - GESHI_CAPS_NO_CHANGE: leave keywords as-is
  1331      *  - GESHI_CAPS_UPPER: convert all keywords to uppercase where found
  1332      *  - GESHI_CAPS_LOWER: convert all keywords to lowercase where found
  1333      *
  1334      * @param int A constant specifying what to do with matched keywords
  1335      * @since 1.0.1
  1336      */
  1337     function set_case_keywords($case) {
  1338         if (in_array($case, array(
  1339             GESHI_CAPS_NO_CHANGE, GESHI_CAPS_UPPER, GESHI_CAPS_LOWER))) {
  1340             $this->language_data['CASE_KEYWORDS'] = $case;
  1341         }
  1342     }
  1343 
  1344     /**
  1345      * Sets how many spaces a tab is substituted for
  1346      *
  1347      * Widths below zero are ignored
  1348      *
  1349      * @param int The tab width
  1350      * @since 1.0.0
  1351      */
  1352     function set_tab_width($width) {
  1353         $this->tab_width = intval($width);
  1354 
  1355         //Check if it fit's the constraints:
  1356         if ($this->tab_width < 1) {
  1357             //Return it to the default
  1358             $this->tab_width = 8;
  1359         }
  1360     }
  1361 
  1362     /**
  1363      * Sets whether or not to use tab-stop width specifed by language
  1364      *
  1365      * @param boolean Whether to use language-specific tab-stop widths
  1366      * @since 1.0.7.20
  1367      */
  1368     function set_use_language_tab_width($use) {
  1369         $this->use_language_tab_width = (bool) $use;
  1370     }
  1371 
  1372     /**
  1373      * Returns the tab width to use, based on the current language and user
  1374      * preference
  1375      *
  1376      * @return int Tab width
  1377      * @since 1.0.7.20
  1378      */
  1379     function get_real_tab_width() {
  1380         if (!$this->use_language_tab_width ||
  1381             !isset($this->language_data['TAB_WIDTH'])) {
  1382             return $this->tab_width;
  1383         } else {
  1384             return $this->language_data['TAB_WIDTH'];
  1385         }
  1386     }
  1387 
  1388     /**
  1389      * Enables/disables strict highlighting. Default is off, calling this
  1390      * method without parameters will turn it on. See documentation
  1391      * for more details on strict mode and where to use it.
  1392      *
  1393      * @param boolean Whether to enable strict mode or not
  1394      * @since 1.0.0
  1395      */
  1396     function enable_strict_mode($mode = true) {
  1397         if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) {
  1398             $this->strict_mode = ($mode) ? GESHI_ALWAYS : GESHI_NEVER;
  1399         }
  1400     }
  1401 
  1402     /**
  1403      * Disables all highlighting
  1404      *
  1405      * @since 1.0.0
  1406      * @todo  Rewrite with array traversal
  1407      * @deprecated In favour of enable_highlighting
  1408      */
  1409     function disable_highlighting() {
  1410         $this->enable_highlighting(false);
  1411     }
  1412 
  1413     /**
  1414      * Enables all highlighting
  1415      *
  1416      * The optional flag parameter was added in version 1.0.7.21 and can be used
  1417      * to enable (true) or disable (false) all highlighting.
  1418      *
  1419      * @since 1.0.0
  1420      * @param boolean A flag specifying whether to enable or disable all highlighting
  1421      * @todo  Rewrite with array traversal
  1422      */
  1423     function enable_highlighting($flag = true) {
  1424         $flag = $flag ? true : false;
  1425         foreach ($this->lexic_permissions as $key => $value) {
  1426             if (is_array($value)) {
  1427                 foreach ($value as $k => $v) {
  1428                     $this->lexic_permissions[$key][$k] = $flag;
  1429                 }
  1430             } else {
  1431                 $this->lexic_permissions[$key] = $flag;
  1432             }
  1433         }
  1434 
  1435         // Context blocks
  1436         $this->enable_important_blocks = $flag;
  1437     }
  1438 
  1439     /**
  1440      * Given a file extension, this method returns either a valid geshi language
  1441      * name, or the empty string if it couldn't be found
  1442      *
  1443      * @param string The extension to get a language name for
  1444      * @param array  A lookup array to use instead of the default one
  1445      * @since 1.0.5
  1446      * @todo Re-think about how this method works (maybe make it private and/or make it
  1447      *       a extension->lang lookup?)
  1448      * @todo static?
  1449      */
  1450     function get_language_name_from_extension( $extension, $lookup = array() ) {
  1451         $extension = strtolower($extension);
  1452 
  1453         if ( !is_array($lookup) || empty($lookup)) {
  1454             $lookup = array(
  1455                 '6502acme' => array( 'a', 's', 'asm', 'inc' ),
  1456                 '6502tasm' => array( 'a', 's', 'asm', 'inc' ),
  1457                 '6502kickass' => array( 'a', 's', 'asm', 'inc' ),
  1458                 '68000devpac' => array( 'a', 's', 'asm', 'inc' ),
  1459                 'abap' => array('abap'),
  1460                 'actionscript' => array('as'),
  1461                 'ada' => array('a', 'ada', 'adb', 'ads'),
  1462                 'apache' => array('conf'),
  1463                 'asm' => array('ash', 'asm', 'inc'),
  1464                 'asp' => array('asp'),
  1465                 'bash' => array('sh'),
  1466                 'bf' => array('bf'),
  1467                 'c' => array('c', 'h'),
  1468                 'c_mac' => array('c', 'h'),
  1469                 'caddcl' => array(),
  1470                 'cadlisp' => array(),
  1471                 'cdfg' => array('cdfg'),
  1472                 'cobol' => array('cbl'),
  1473                 'cpp' => array('cpp', 'hpp', 'C', 'H', 'CPP', 'HPP'),
  1474                 'csharp' => array('cs'),
  1475                 'css' => array('css'),
  1476                 'd' => array('d'),
  1477                 'delphi' => array('dpk', 'dpr', 'pp', 'pas'),
  1478                 'diff' => array('diff', 'patch'),
  1479                 'dos' => array('bat', 'cmd'),
  1480                 'gdb' => array('kcrash', 'crash', 'bt'),
  1481                 'gettext' => array('po', 'pot'),
  1482                 'gml' => array('gml'),
  1483                 'gnuplot' => array('plt'),
  1484                 'groovy' => array('groovy'),
  1485                 'haskell' => array('hs'),
  1486                 'haxe' => array('hx'),
  1487                 'html4strict' => array('html', 'htm'),
  1488                 'ini' => array('ini', 'desktop'),
  1489                 'java' => array('java'),
  1490                 'javascript' => array('js'),
  1491                 'klonec' => array('kl1'),
  1492                 'klonecpp' => array('klx'),
  1493                 'latex' => array('tex'),
  1494                 'lisp' => array('lisp'),
  1495                 'lua' => array('lua'),
  1496                 'matlab' => array('m'),
  1497                 'mpasm' => array(),
  1498                 'mysql' => array('sql'),
  1499                 'nsis' => array(),
  1500                 'objc' => array(),
  1501                 'oobas' => array(),
  1502                 'oracle8' => array(),
  1503                 'oracle10' => array(),
  1504                 'pascal' => array('pas'),
  1505                 'perl' => array('pl', 'pm'),
  1506                 'php' => array('php', 'php5', 'phtml', 'phps'),
  1507                 'povray' => array('pov'),
  1508                 'providex' => array('pvc', 'pvx'),
  1509                 'prolog' => array('pl'),
  1510                 'python' => array('py'),
  1511                 'qbasic' => array('bi'),
  1512                 'reg' => array('reg'),
  1513                 'ruby' => array('rb'),
  1514                 'sas' => array('sas'),
  1515                 'scala' => array('scala'),
  1516                 'scheme' => array('scm'),
  1517                 'scilab' => array('sci'),
  1518                 'smalltalk' => array('st'),
  1519                 'smarty' => array(),
  1520                 'tcl' => array('tcl'),
  1521                 'text' => array('txt'),
  1522                 'vb' => array('bas'),
  1523                 'vbnet' => array(),
  1524                 'visualfoxpro' => array(),
  1525                 'whitespace' => array('ws'),
  1526                 'xml' => array('xml', 'svg', 'xrc'),
  1527                 'z80' => array('z80', 'asm', 'inc')
  1528             );
  1529         }
  1530 
  1531         foreach ($lookup as $lang => $extensions) {
  1532             if (in_array($extension, $extensions)) {
  1533                 return $lang;
  1534             }
  1535         }
  1536 
  1537         return 'text';
  1538     }
  1539 
  1540     /**
  1541      * Given a file name, this method loads its contents in, and attempts
  1542      * to set the language automatically. An optional lookup table can be
  1543      * passed for looking up the language name. If not specified a default
  1544      * table is used
  1545      *
  1546      * The language table is in the form
  1547      * <pre>array(
  1548      *   'lang_name' => array('extension', 'extension', ...),
  1549      *   'lang_name' ...
  1550      * );</pre>
  1551      *
  1552      * @param string The filename to load the source from
  1553      * @param array  A lookup array to use instead of the default one
  1554      * @todo Complete rethink of this and above method
  1555      * @since 1.0.5
  1556      */
  1557     function load_from_file($file_name, $lookup = array()) {
  1558         if (is_readable($file_name)) {
  1559             $this->set_source(file_get_contents($file_name));
  1560             $this->set_language($this->get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup));
  1561         } else {
  1562             $this->error = GESHI_ERROR_FILE_NOT_READABLE;
  1563         }
  1564     }
  1565 
  1566     /**
  1567      * Adds a keyword to a keyword group for highlighting
  1568      *
  1569      * @param int    The key of the keyword group to add the keyword to
  1570      * @param string The word to add to the keyword group
  1571      * @since 1.0.0
  1572      */
  1573     function add_keyword($key, $word) {
  1574         if (!is_array($this->language_data['KEYWORDS'][$key])) {
  1575             $this->language_data['KEYWORDS'][$key] = array();
  1576         }
  1577         if (!in_array($word, $this->language_data['KEYWORDS'][$key])) {
  1578             $this->language_data['KEYWORDS'][$key][] = $word;
  1579 
  1580             //NEW in 1.0.8 don't recompile the whole optimized regexp, simply append it
  1581             if ($this->parse_cache_built) {
  1582                 $subkey = count($this->language_data['CACHED_KEYWORD_LISTS'][$key]) - 1;
  1583                 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$subkey] .= '|' . preg_quote($word, '/');
  1584             }
  1585         }
  1586     }
  1587 
  1588     /**
  1589      * Removes a keyword from a keyword group
  1590      *
  1591      * @param int    The key of the keyword group to remove the keyword from
  1592      * @param string The word to remove from the keyword group
  1593      * @param bool   Wether to automatically recompile the optimized regexp list or not.
  1594      *               Note: if you set this to false and @see GeSHi->parse_code() was already called once,
  1595      *               for the current language, you have to manually call @see GeSHi->optimize_keyword_group()
  1596      *               or the removed keyword will stay in cache and still be highlighted! On the other hand
  1597      *               it might be too expensive to recompile the regexp list for every removal if you want to
  1598      *               remove a lot of keywords.
  1599      * @since 1.0.0
  1600      */
  1601     function remove_keyword($key, $word, $recompile = true) {
  1602         $key_to_remove = array_search($word, $this->language_data['KEYWORDS'][$key]);
  1603         if ($key_to_remove !== false) {
  1604             unset($this->language_data['KEYWORDS'][$key][$key_to_remove]);
  1605 
  1606             //NEW in 1.0.8, optionally recompile keyword group
  1607             if ($recompile && $this->parse_cache_built) {
  1608                 $this->optimize_keyword_group($key);
  1609             }
  1610         }
  1611     }
  1612 
  1613     /**
  1614      * Creates a new keyword group
  1615      *
  1616      * @param int    The key of the keyword group to create
  1617      * @param string The styles for the keyword group
  1618      * @param boolean Whether the keyword group is case sensitive ornot
  1619      * @param array  The words to use for the keyword group
  1620      * @since 1.0.0
  1621      */
  1622     function add_keyword_group($key, $styles, $case_sensitive = true, $words = array()) {
  1623         $words = (array) $words;
  1624         if  (empty($words)) {
  1625             // empty word lists mess up highlighting
  1626             return false;
  1627         }
  1628 
  1629         //Add the new keyword group internally
  1630         $this->language_data['KEYWORDS'][$key] = $words;
  1631         $this->lexic_permissions['KEYWORDS'][$key] = true;
  1632         $this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive;
  1633         $this->language_data['STYLES']['KEYWORDS'][$key] = $styles;
  1634 
  1635         //NEW in 1.0.8, cache keyword regexp
  1636         if ($this->parse_cache_built) {
  1637             $this->optimize_keyword_group($key);
  1638         }
  1639     }
  1640 
  1641     /**
  1642      * Removes a keyword group
  1643      *
  1644      * @param int    The key of the keyword group to remove
  1645      * @since 1.0.0
  1646      */
  1647     function remove_keyword_group ($key) {
  1648         //Remove the keyword group internally
  1649         unset($this->language_data['KEYWORDS'][$key]);
  1650         unset($this->lexic_permissions['KEYWORDS'][$key]);
  1651         unset($this->language_data['CASE_SENSITIVE'][$key]);
  1652         unset($this->language_data['STYLES']['KEYWORDS'][$key]);
  1653 
  1654         //NEW in 1.0.8
  1655         unset($this->language_data['CACHED_KEYWORD_LISTS'][$key]);
  1656     }
  1657 
  1658     /**
  1659      * compile optimized regexp list for keyword group
  1660      *
  1661      * @param int   The key of the keyword group to compile & optimize
  1662      * @since 1.0.8
  1663      */
  1664     function optimize_keyword_group($key) {
  1665         $this->language_data['CACHED_KEYWORD_LISTS'][$key] =
  1666             $this->optimize_regexp_list($this->language_data['KEYWORDS'][$key]);
  1667         $space_as_whitespace = false;
  1668         if(isset($this->language_data['PARSER_CONTROL'])) {
  1669             if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
  1670                 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'])) {
  1671                     $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'];
  1672                 }
  1673                 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
  1674                     if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
  1675                         $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'];
  1676                     }
  1677                 }
  1678             }
  1679         }
  1680         if($space_as_whitespace) {
  1681             foreach($this->language_data['CACHED_KEYWORD_LISTS'][$key] as $rxk => $rxv) {
  1682                 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$rxk] =
  1683                     str_replace(" ", "\\s+", $rxv);
  1684             }
  1685         }
  1686     }
  1687 
  1688     /**
  1689      * Sets the content of the header block
  1690      *
  1691      * @param string The content of the header block
  1692      * @since 1.0.2
  1693      */
  1694     function set_header_content($content) {
  1695         $this->header_content = $content;
  1696     }
  1697 
  1698     /**
  1699      * Sets the content of the footer block
  1700      *
  1701      * @param string The content of the footer block
  1702      * @since 1.0.2
  1703      */
  1704     function set_footer_content($content) {
  1705         $this->footer_content = $content;
  1706     }
  1707 
  1708     /**
  1709      * Sets the style for the header content
  1710      *
  1711      * @param string The style for the header content
  1712      * @since 1.0.2
  1713      */
  1714     function set_header_content_style($style) {
  1715         $this->header_content_style = $style;
  1716     }
  1717 
  1718     /**
  1719      * Sets the style for the footer content
  1720      *
  1721      * @param string The style for the footer content
  1722      * @since 1.0.2
  1723      */
  1724     function set_footer_content_style($style) {
  1725         $this->footer_content_style = $style;
  1726     }
  1727 
  1728     /**
  1729      * Sets whether to force a surrounding block around
  1730      * the highlighted code or not
  1731      *
  1732      * @param boolean Tells whether to enable or disable this feature
  1733      * @since 1.0.7.20
  1734      */
  1735     function enable_inner_code_block($flag) {
  1736         $this->force_code_block = (bool)$flag;
  1737     }
  1738 
  1739     /**
  1740      * Sets the base URL to be used for keywords
  1741      *
  1742      * @param int The key of the keyword group to set the URL for
  1743      * @param string The URL to set for the group. If {FNAME} is in
  1744      *               the url somewhere, it is replaced by the keyword
  1745      *               that the URL is being made for
  1746      * @since 1.0.2
  1747      */
  1748     function set_url_for_keyword_group($group, $url) {
  1749         $this->language_data['URLS'][$group] = $url;
  1750     }
  1751 
  1752     /**
  1753      * Sets styles for links in code
  1754      *
  1755      * @param int A constant that specifies what state the style is being
  1756      *            set for - e.g. :hover or :visited
  1757      * @param string The styles to use for that state
  1758      * @since 1.0.2
  1759      */
  1760     function set_link_styles($type, $styles) {
  1761         $this->link_styles[$type] = $styles;
  1762     }
  1763 
  1764     /**
  1765      * Sets the target for links in code
  1766      *
  1767      * @param string The target for links in the code, e.g. _blank
  1768      * @since 1.0.3
  1769      */
  1770     function set_link_target($target) {
  1771         if (!$target) {
  1772             $this->link_target = '';
  1773         } else {
  1774             $this->link_target = ' target="' . $target . '"';
  1775         }
  1776     }
  1777 
  1778     /**
  1779      * Sets styles for important parts of the code
  1780      *
  1781      * @param string The styles to use on important parts of the code
  1782      * @since 1.0.2
  1783      */
  1784     function set_important_styles($styles) {
  1785         $this->important_styles = $styles;
  1786     }
  1787 
  1788     /**
  1789      * Sets whether context-important blocks are highlighted
  1790      *
  1791      * @param boolean Tells whether to enable or disable highlighting of important blocks
  1792      * @todo REMOVE THIS SHIZ FROM GESHI!
  1793      * @deprecated
  1794      * @since 1.0.2
  1795      */
  1796     function enable_important_blocks($flag) {
  1797         $this->enable_important_blocks = ( $flag ) ? true : false;
  1798     }
  1799 
  1800     /**
  1801      * Whether CSS IDs should be added to each line
  1802      *
  1803      * @param boolean If true, IDs will be added to each line.
  1804      * @since 1.0.2
  1805      */
  1806     function enable_ids($flag = true) {
  1807         $this->add_ids = ($flag) ? true : false;
  1808     }
  1809 
  1810     /**
  1811      * Specifies which lines to highlight extra
  1812      *
  1813      * The extra style parameter was added in 1.0.7.21.
  1814      *
  1815      * @param mixed An array of line numbers to highlight, or just a line
  1816      *              number on its own.
  1817      * @param string A string specifying the style to use for this line.
  1818      *              If null is specified, the default style is used.
  1819      *              If false is specified, the line will be removed from
  1820      *              special highlighting
  1821      * @since 1.0.2
  1822      * @todo  Some data replication here that could be cut down on
  1823      */
  1824     function highlight_lines_extra($lines, $style = null) {
  1825         if (is_array($lines)) {
  1826             //Split up the job using single lines at a time
  1827             foreach ($lines as $line) {
  1828                 $this->highlight_lines_extra($line, $style);
  1829             }
  1830         } else {
  1831             //Mark the line as being highlighted specially
  1832             $lines = intval($lines);
  1833             $this->highlight_extra_lines[$lines] = $lines;
  1834 
  1835             //Decide on which style to use
  1836             if ($style === null) { //Check if we should use default style
  1837                 unset($this->highlight_extra_lines_styles[$lines]);
  1838             } elseif ($style === false) { //Check if to remove this line
  1839                 unset($this->highlight_extra_lines[$lines]);
  1840                 unset($this->highlight_extra_lines_styles[$lines]);
  1841             } else {
  1842                 $this->highlight_extra_lines_styles[$lines] = $style;
  1843             }
  1844         }
  1845     }
  1846 
  1847     /**
  1848      * Sets the style for extra-highlighted lines
  1849      *
  1850      * @param string The style for extra-highlighted lines
  1851      * @since 1.0.2
  1852      */
  1853     function set_highlight_lines_extra_style($styles) {
  1854         $this->highlight_extra_lines_style = $styles;
  1855     }
  1856 
  1857     /**
  1858      * Sets the line-ending
  1859      *
  1860      * @param string The new line-ending
  1861      * @since 1.0.2
  1862      */
  1863     function set_line_ending($line_ending) {
  1864         $this->line_ending = (string)$line_ending;
  1865     }
  1866 
  1867     /**
  1868      * Sets what number line numbers should start at. Should
  1869      * be a positive integer, and will be converted to one.
  1870      *
  1871      * <b>Warning:</b> Using this method will add the "start"
  1872      * attribute to the &lt;ol&gt; that is used for line numbering.
  1873      * This is <b>not</b> valid XHTML strict, so if that's what you
  1874      * care about then don't use this method. Firefox is getting
  1875      * support for the CSS method of doing this in 1.1 and Opera
  1876      * has support for the CSS method, but (of course) IE doesn't
  1877      * so it's not worth doing it the CSS way yet.
  1878      *
  1879      * @param int The number to start line numbers at
  1880      * @since 1.0.2
  1881      */
  1882     function start_line_numbers_at($number) {
  1883         $this->line_numbers_start = abs(intval($number));
  1884     }
  1885 
  1886     /**
  1887      * Sets the encoding used for htmlspecialchars(), for international
  1888      * support.
  1889      *
  1890      * NOTE: This is not needed for now because htmlspecialchars() is not
  1891      * being used (it has a security hole in PHP4 that has not been patched).
  1892      * Maybe in a future version it may make a return for speed reasons, but
  1893      * I doubt it.
  1894      *
  1895      * @param string The encoding to use for the source
  1896      * @since 1.0.3
  1897      */
  1898     function set_encoding($encoding) {
  1899         if ($encoding) {
  1900           $this->encoding = strtolower($encoding);
  1901         }
  1902     }
  1903 
  1904     /**
  1905      * Turns linking of keywords on or off.
  1906      *
  1907      * @param boolean If true, links will be added to keywords
  1908      * @since 1.0.2
  1909      */
  1910     function enable_keyword_links($enable = true) {
  1911         $this->keyword_links = (bool) $enable;
  1912     }
  1913 
  1914     /**
  1915      * Setup caches needed for styling. This is automatically called in
  1916      * parse_code() and get_stylesheet() when appropriate. This function helps
  1917      * stylesheet generators as they rely on some style information being
  1918      * preprocessed
  1919      *
  1920      * @since 1.0.8
  1921      * @access private
  1922      */
  1923     function build_style_cache() {
  1924         //Build the style cache needed to highlight numbers appropriate
  1925         if($this->lexic_permissions['NUMBERS']) {
  1926             //First check what way highlighting information for numbers are given
  1927             if(!isset($this->language_data['NUMBERS'])) {
  1928                 $this->language_data['NUMBERS'] = 0;
  1929             }
  1930 
  1931             if(is_array($this->language_data['NUMBERS'])) {
  1932                 $this->language_data['NUMBERS_CACHE'] = $this->language_data['NUMBERS'];
  1933             } else {
  1934                 $this->language_data['NUMBERS_CACHE'] = array();
  1935                 if(!$this->language_data['NUMBERS']) {
  1936                     $this->language_data['NUMBERS'] =
  1937                         GESHI_NUMBER_INT_BASIC |
  1938                         GESHI_NUMBER_FLT_NONSCI;
  1939                 }
  1940 
  1941                 for($i = 0, $j = $this->language_data['NUMBERS']; $j > 0; ++$i, $j>>=1) {
  1942                     //Rearrange style indices if required ...
  1943                     if(isset($this->language_data['STYLES']['NUMBERS'][1<<$i])) {
  1944                         $this->language_data['STYLES']['NUMBERS'][$i] =
  1945                             $this->language_data['STYLES']['NUMBERS'][1<<$i];
  1946                         unset($this->language_data['STYLES']['NUMBERS'][1<<$i]);
  1947                     }
  1948 
  1949                     //Check if this bit is set for highlighting
  1950                     if($j&1) {
  1951                         //So this bit is set ...
  1952                         //Check if it belongs to group 0 or the actual stylegroup
  1953                         if(isset($this->language_data['STYLES']['NUMBERS'][$i])) {
  1954                             $this->language_data['NUMBERS_CACHE'][$i] = 1 << $i;
  1955                         } else {
  1956                             if(!isset($this->language_data['NUMBERS_CACHE'][0])) {
  1957                                 $this->language_data['NUMBERS_CACHE'][0] = 0;
  1958                             }
  1959                             $this->language_data['NUMBERS_CACHE'][0] |= 1 << $i;
  1960                         }
  1961                     }
  1962                 }
  1963             }
  1964         }
  1965     }
  1966 
  1967     /**
  1968      * Setup caches needed for parsing. This is automatically called in parse_code() when appropriate.
  1969      * This function makes stylesheet generators much faster as they do not need these caches.
  1970      *
  1971      * @since 1.0.8
  1972      * @access private
  1973      */
  1974     function build_parse_cache() {
  1975         // cache symbol regexp
  1976         //As this is a costy operation, we avoid doing it for multiple groups ...
  1977         //Instead we perform it for all symbols at once.
  1978         //
  1979         //For this to work, we need to reorganize the data arrays.
  1980         if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
  1981             $this->language_data['MULTIPLE_SYMBOL_GROUPS'] = count($this->language_data['STYLES']['SYMBOLS']) > 1;
  1982 
  1983             $this->language_data['SYMBOL_DATA'] = array();
  1984             $symbol_preg_multi = array(); // multi char symbols
  1985             $symbol_preg_single = array(); // single char symbols
  1986             foreach ($this->language_data['SYMBOLS'] as $key => $symbols) {
  1987                 if (is_array($symbols)) {
  1988                     foreach ($symbols as $sym) {
  1989                         $sym = $this->hsc($sym);
  1990                         if (!isset($this->language_data['SYMBOL_DATA'][$sym])) {
  1991                             $this->language_data['SYMBOL_DATA'][$sym] = $key;
  1992                             if (isset($sym[1])) { // multiple chars
  1993                                 $symbol_preg_multi[] = preg_quote($sym, '/');
  1994                             } else { // single char
  1995                                 if ($sym == '-') {
  1996                                     // don't trigger range out of order error
  1997                                     $symbol_preg_single[] = '\-';
  1998                                 } else {
  1999                                     $symbol_preg_single[] = preg_quote($sym, '/');
  2000                                 }
  2001                             }
  2002                         }
  2003                     }
  2004                 } else {
  2005                     $symbols = $this->hsc($symbols);
  2006                     if (!isset($this->language_data['SYMBOL_DATA'][$symbols])) {
  2007                         $this->language_data['SYMBOL_DATA'][$symbols] = 0;
  2008                         if (isset($symbols[1])) { // multiple chars
  2009                             $symbol_preg_multi[] = preg_quote($symbols, '/');
  2010                         } elseif ($symbols == '-') {
  2011                             // don't trigger range out of order error
  2012                             $symbol_preg_single[] = '\-';
  2013                         } else { // single char
  2014                             $symbol_preg_single[] = preg_quote($symbols, '/');
  2015                         }
  2016                     }
  2017                 }
  2018             }
  2019 
  2020             //Now we have an array with each possible symbol as the key and the style as the actual data.
  2021             //This way we can set the correct style just the moment we highlight ...
  2022             //
  2023             //Now we need to rewrite our array to get a search string that
  2024             $symbol_preg = array();
  2025             if (!empty($symbol_preg_multi)) {
  2026                 rsort($symbol_preg_multi);
  2027                 $symbol_preg[] = implode('|', $symbol_preg_multi);
  2028             }
  2029             if (!empty($symbol_preg_single)) {
  2030                 rsort($symbol_preg_single);
  2031                 $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']';
  2032             }
  2033             $this->language_data['SYMBOL_SEARCH'] = implode("|", $symbol_preg);
  2034         }
  2035 
  2036         // cache optimized regexp for keyword matching
  2037         // remove old cache
  2038         $this->language_data['CACHED_KEYWORD_LISTS'] = array();
  2039         foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
  2040             if (!isset($this->lexic_permissions['KEYWORDS'][$key]) ||
  2041                     $this->lexic_permissions['KEYWORDS'][$key]) {
  2042                 $this->optimize_keyword_group($key);
  2043             }
  2044         }
  2045 
  2046         // brackets
  2047         if ($this->lexic_permissions['BRACKETS']) {
  2048             $this->language_data['CACHE_BRACKET_MATCH'] = array('[', ']', '(', ')', '{', '}');
  2049             if (!$this->use_classes && isset($this->language_data['STYLES']['BRACKETS'][0])) {
  2050                 $this->language_data['CACHE_BRACKET_REPLACE'] = array(
  2051                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#91;|>',
  2052                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#93;|>',
  2053                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#40;|>',
  2054                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#41;|>',
  2055                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#123;|>',
  2056                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#125;|>',
  2057                 );
  2058             }
  2059             else {
  2060                 $this->language_data['CACHE_BRACKET_REPLACE'] = array(
  2061                     '<| class="br0">&#91;|>',
  2062                     '<| class="br0">&#93;|>',
  2063                     '<| class="br0">&#40;|>',
  2064                     '<| class="br0">&#41;|>',
  2065                     '<| class="br0">&#123;|>',
  2066                     '<| class="br0">&#125;|>',
  2067                 );
  2068             }
  2069         }
  2070 
  2071         //Build the parse cache needed to highlight numbers appropriate
  2072         if($this->lexic_permissions['NUMBERS']) {
  2073             //Check if the style rearrangements have been processed ...
  2074             //This also does some preprocessing to check which style groups are useable ...
  2075             if(!isset($this->language_data['NUMBERS_CACHE'])) {
  2076                 $this->build_style_cache();
  2077             }
  2078 
  2079             //Number format specification
  2080             //All this formats are matched case-insensitively!
  2081             static $numbers_format = array(
  2082                 GESHI_NUMBER_INT_BASIC =>
  2083                     '(?:(?<![0-9a-z_\.%$@])|(?<=\.\.))(?<![\d\.]e[+\-])([1-9]\d*?|0)(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
  2084                 GESHI_NUMBER_INT_CSTYLE =>
  2085                     '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)l(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
  2086                 GESHI_NUMBER_BIN_SUFFIX =>
  2087                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[01]+?[bB](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
  2088                 GESHI_NUMBER_BIN_PREFIX_PERCENT =>
  2089                     '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])%[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
  2090                 GESHI_NUMBER_BIN_PREFIX_0B =>
  2091                     '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0b[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
  2092                 GESHI_NUMBER_OCT_PREFIX =>
  2093                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
  2094                 GESHI_NUMBER_OCT_PREFIX_0O =>
  2095                     '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0o[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
  2096                 GESHI_NUMBER_OCT_PREFIX_AT =>
  2097                     '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])\@[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
  2098                 GESHI_NUMBER_OCT_SUFFIX =>
  2099                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[0-7]+?o(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
  2100                 GESHI_NUMBER_HEX_PREFIX =>
  2101                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0x[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
  2102                 GESHI_NUMBER_HEX_PREFIX_DOLLAR =>
  2103                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\$[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
  2104                 GESHI_NUMBER_HEX_SUFFIX =>
  2105                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d[0-9a-fA-F]*?[hH](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
  2106                 GESHI_NUMBER_FLT_NONSCI =>
  2107                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d+?\.\d+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
  2108                 GESHI_NUMBER_FLT_NONSCI_F =>
  2109                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)f(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
  2110                 GESHI_NUMBER_FLT_SCI_SHORT =>
  2111                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\.\d+?(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
  2112                 GESHI_NUMBER_FLT_SCI_ZERO =>
  2113                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)'
  2114                 );
  2115 
  2116             //At this step we have an associative array with flag groups for a
  2117             //specific style or an string denoting a regexp given its index.
  2118             $this->language_data['NUMBERS_RXCACHE'] = array();
  2119             foreach($this->language_data['NUMBERS_CACHE'] as $key => $rxdata) {
  2120                 if(is_string($rxdata)) {
  2121                     $regexp = $rxdata;
  2122                 } else {
  2123                     //This is a bitfield of number flags to highlight:
  2124                     //Build an array, implode them together and make this the actual RX
  2125                     $rxuse = array();
  2126                     for($i = 1; $i <= $rxdata; $i<<=1) {
  2127                         if($rxdata & $i) {
  2128                             $rxuse[] = $numbers_format[$i];
  2129                         }
  2130                     }
  2131                     $regexp = implode("|", $rxuse);
  2132                 }
  2133 
  2134                 $this->language_data['NUMBERS_RXCACHE'][$key] =
  2135                     "/(?<!<\|\/)(?<!<\|!REG3XP)(?<!<\|\/NUM!)(?<!\d\/>)($regexp)(?!(?:<DOT>|(?>[^\<]))+>)(?![^<]*>)(?!\|>)(?!\/>)/i"; //
  2136             }
  2137 
  2138             if(!isset($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'])) {
  2139                 $this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'] = '#\d#';
  2140             }
  2141         }
  2142 
  2143         $this->parse_cache_built = true;
  2144     }
  2145 
  2146     /**
  2147      * Returns the code in $this->source, highlighted and surrounded by the
  2148      * nessecary HTML.
  2149      *
  2150      * This should only be called ONCE, cos it's SLOW! If you want to highlight
  2151      * the same source multiple times, you're better off doing a whole lot of
  2152      * str_replaces to replace the &lt;span&gt;s
  2153      *
  2154      * @since 1.0.0
  2155      */
  2156     function parse_code () {
  2157         // Start the timer
  2158         $start_time = microtime();
  2159 
  2160         // Replace all newlines to a common form.
  2161         $code = str_replace("\r\n", "\n", $this->source);
  2162         $code = str_replace("\r", "\n", $code);
  2163 
  2164         // Firstly, if there is an error, we won't highlight
  2165         if ($this->error) {
  2166             //Escape the source for output
  2167             $result = $this->hsc($this->source);
  2168 
  2169             //This fix is related to SF#1923020, but has to be applied regardless of
  2170             //actually highlighting symbols.
  2171             $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $result);
  2172 
  2173             // Timing is irrelevant
  2174             $this->set_time($start_time, $start_time);
  2175             $this->finalise($result);
  2176             return $result;
  2177         }
  2178 
  2179         // make sure the parse cache is up2date
  2180         if (!$this->parse_cache_built) {
  2181             $this->build_parse_cache();
  2182         }
  2183 
  2184         // Initialise various stuff
  2185         $length           = strlen($code);
  2186         $COMMENT_MATCHED  = false;
  2187         $stuff_to_parse   = '';
  2188         $endresult        = '';
  2189 
  2190         // "Important" selections are handled like multiline comments
  2191         // @todo GET RID OF THIS SHIZ
  2192         if ($this->enable_important_blocks) {
  2193             $this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI_END_IMPORTANT;
  2194         }
  2195 
  2196         if ($this->strict_mode) {
  2197             // Break the source into bits. Each bit will be a portion of the code
  2198             // within script delimiters - for example, HTML between < and >
  2199             $k = 0;
  2200             $parts = array();
  2201             $matches = array();
  2202             $next_match_pointer = null;
  2203             // we use a copy to unset delimiters on demand (when they are not found)
  2204             $delim_copy = $this->language_data['SCRIPT_DELIMITERS'];
  2205             $i = 0;
  2206             while ($i < $length) {
  2207                 $next_match_pos = $length + 1; // never true
  2208                 foreach ($delim_copy as $dk => $delimiters) {
  2209                     if(is_array($delimiters)) {
  2210                         foreach ($delimiters as $open => $close) {
  2211                             // make sure the cache is setup properly
  2212                             if (!isset($matches[$dk][$open])) {
  2213                                 $matches[$dk][$open] = array(
  2214                                     'next_match' => -1,
  2215                                     'dk' => $dk,
  2216 
  2217                                     'open' => $open, // needed for grouping of adjacent code blocks (see below)
  2218                                     'open_strlen' => strlen($open),
  2219 
  2220                                     'close' => $close,
  2221                                     'close_strlen' => strlen($close),
  2222                                 );
  2223                             }
  2224                             // Get the next little bit for this opening string
  2225                             if ($matches[$dk][$open]['next_match'] < $i) {
  2226                                 // only find the next pos if it was not already cached
  2227                                 $open_pos = strpos($code, $open, $i);
  2228                                 if ($open_pos === false) {
  2229                                     // no match for this delimiter ever
  2230                                     unset($delim_copy[$dk][$open]);
  2231                                     continue;
  2232                                 }
  2233                                 $matches[$dk][$open]['next_match'] = $open_pos;
  2234                             }
  2235                             if ($matches[$dk][$open]['next_match'] < $next_match_pos) {
  2236                                 //So we got a new match, update the close_pos
  2237                                 $matches[$dk][$open]['close_pos'] =
  2238                                     strpos($code, $close, $matches[$dk][$open]['next_match']+1);
  2239 
  2240                                 $next_match_pointer =& $matches[$dk][$open];
  2241                                 $next_match_pos = $matches[$dk][$open]['next_match'];
  2242                             }
  2243                         }
  2244                     } else {
  2245                         //So we should match an RegExp as Strict Block ...
  2246                         /**
  2247                          * The value in $delimiters is expected to be an RegExp
  2248                          * containing exactly 2 matching groups:
  2249                          *  - Group 1 is the opener
  2250                          *  - Group 2 is the closer
  2251                          */
  2252                         if(!GESHI_PHP_PRE_433 && //Needs proper rewrite to work with PHP >=4.3.0; 4.3.3 is guaranteed to work.
  2253                             preg_match($delimiters, $code, $matches_rx, PREG_OFFSET_CAPTURE, $i)) {
  2254                             //We got a match ...
  2255                             if(isset($matches_rx['start']) && isset($matches_rx['end']))
  2256                             {
  2257                                 $matches[$dk] = array(
  2258                                     'next_match' => $matches_rx['start'][1],
  2259                                     'dk' => $dk,
  2260 
  2261                                     'close_strlen' => strlen($matches_rx['end'][0]),
  2262                                     'close_pos' => $matches_rx['end'][1],
  2263                                     );
  2264                             } else {
  2265                                 $matches[$dk] = array(
  2266                                     'next_match' => $matches_rx[1][1],
  2267                                     'dk' => $dk,
  2268 
  2269                                     'close_strlen' => strlen($matches_rx[2][0]),
  2270                                     'close_pos' => $matches_rx[2][1],
  2271                                     );
  2272                             }
  2273                         } else {
  2274                             // no match for this delimiter ever
  2275                             unset($delim_copy[$dk]);
  2276                             continue;
  2277                         }
  2278 
  2279                         if ($matches[$dk]['next_match'] <= $next_match_pos) {
  2280                             $next_match_pointer =& $matches[$dk];
  2281                             $next_match_pos = $matches[$dk]['next_match'];
  2282                         }
  2283                     }
  2284                 }
  2285 
  2286                 // non-highlightable text
  2287                 $parts[$k] = array(
  2288                     1 => substr($code, $i, $next_match_pos - $i)
  2289                 );
  2290                 ++$k;
  2291 
  2292                 if ($next_match_pos > $length) {
  2293                     // out of bounds means no next match was found
  2294                     break;
  2295                 }
  2296 
  2297                 // highlightable code
  2298                 $parts[$k][0] = $next_match_pointer['dk'];
  2299 
  2300                 //Only combine for non-rx script blocks
  2301                 if(is_array($delim_copy[$next_match_pointer['dk']])) {
  2302                     // group adjacent script blocks, e.g. <foobar><asdf> should be one block, not three!
  2303                     $i = $next_match_pos + $next_match_pointer['open_strlen'];
  2304                     while (true) {
  2305                         $close_pos = strpos($code, $next_match_pointer['close'], $i);
  2306                         if ($close_pos == false) {
  2307                             break;
  2308                         }
  2309                         $i = $close_pos + $next_match_pointer['close_strlen'];
  2310                         if ($i == $length) {
  2311                             break;
  2312                         }
  2313                         if ($code[$i] == $next_match_pointer['open'][0] && ($next_match_pointer['open_strlen'] == 1 ||
  2314                             substr($code, $i, $next_match_pointer['open_strlen']) == $next_match_pointer['open'])) {
  2315                             // merge adjacent but make sure we don't merge things like <tag><!-- comment -->
  2316                             foreach ($matches as $submatches) {
  2317                                 foreach ($submatches as $match) {
  2318                                     if ($match['next_match'] == $i) {
  2319                                         // a different block already matches here!
  2320                                         break 3;
  2321                                     }
  2322                                 }
  2323                             }
  2324                         } else {
  2325                             break;
  2326                         }
  2327                     }
  2328                 } else {
  2329                     $close_pos = $next_match_pointer['close_pos'] + $next_match_pointer['close_strlen'];
  2330                     $i = $close_pos;
  2331                 }
  2332 
  2333                 if ($close_pos === false) {
  2334                     // no closing delimiter found!
  2335                     $parts[$k][1] = substr($code, $next_match_pos);
  2336                     ++$k;
  2337                     break;
  2338                 } else {
  2339                     $parts[$k][1] = substr($code, $next_match_pos, $i - $next_match_pos);
  2340                     ++$k;
  2341                 }
  2342             }
  2343             unset($delim_copy, $next_match_pointer, $next_match_pos, $matches);
  2344             $num_parts = $k;
  2345 
  2346             if ($num_parts == 1 && $this->strict_mode == GESHI_MAYBE) {
  2347                 // when we have only one part, we don't have anything to highlight at all.
  2348                 // if we have a "maybe" strict language, this should be handled as highlightable code
  2349                 $parts = array(
  2350                     0 => array(
  2351                         0 => '',
  2352                         1 => ''
  2353                     ),
  2354                     1 => array(
  2355                         0 => null,
  2356                         1 => $parts[0][1]
  2357                     )
  2358                 );
  2359                 $num_parts = 2;
  2360             }
  2361 
  2362         } else {
  2363             // Not strict mode - simply dump the source into
  2364             // the array at index 1 (the first highlightable block)
  2365             $parts = array(
  2366                 0 => array(
  2367                     0 => '',
  2368                     1 => ''
  2369                 ),
  2370                 1 => array(
  2371                     0 => null,
  2372                     1 => $code
  2373                 )
  2374             );
  2375             $num_parts = 2;
  2376         }
  2377 
  2378         //Unset variables we won't need any longer
  2379         unset($code);
  2380 
  2381         //Preload some repeatedly used values regarding hardquotes ...
  2382         $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false;
  2383         $hq_strlen = strlen($hq);
  2384 
  2385         //Preload if line numbers are to be generated afterwards
  2386         //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398
  2387         $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS ||
  2388             !empty($this->highlight_extra_lines) || !$this->allow_multiline_span;
  2389 
  2390         //preload the escape char for faster checking ...
  2391         $escaped_escape_char = $this->hsc($this->language_data['ESCAPE_CHAR']);
  2392 
  2393         // this is used for single-line comments
  2394         $sc_disallowed_before = "";
  2395         $sc_disallowed_after = "";
  2396 
  2397         if (isset($this->language_data['PARSER_CONTROL'])) {
  2398             if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) {
  2399                 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) {
  2400                     $sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'];
  2401                 }
  2402                 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) {
  2403                     $sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'];
  2404                 }
  2405             }
  2406         }
  2407 
  2408         //Fix for SF#1932083: Multichar Quotemarks unsupported
  2409         $is_string_starter = array();
  2410         if ($this->lexic_permissions['STRINGS']) {
  2411             foreach ($this->language_data['QUOTEMARKS'] as $quotemark) {
  2412                 if (!isset($is_string_starter[$quotemark[0]])) {
  2413                     $is_string_starter[$quotemark[0]] = (string)$quotemark;
  2414                 } elseif (is_string($is_string_starter[$quotemark[0]])) {
  2415                     $is_string_starter[$quotemark[0]] = array(
  2416                         $is_string_starter[$quotemark[0]],
  2417                         $quotemark);
  2418                 } else {
  2419                     $is_string_starter[$quotemark[0]][] = $quotemark;
  2420                 }
  2421             }
  2422         }
  2423 
  2424         // Now we go through each part. We know that even-indexed parts are
  2425         // code that shouldn't be highlighted, and odd-indexed parts should
  2426         // be highlighted
  2427         for ($key = 0; $key < $num_parts; ++$key) {
  2428             $STRICTATTRS = '';
  2429 
  2430             // If this block should be highlighted...
  2431             if (!($key & 1)) {
  2432                 // Else not a block to highlight
  2433                 $endresult .= $this->hsc($parts[$key][1]);
  2434                 unset($parts[$key]);
  2435                 continue;
  2436             }
  2437 
  2438             $result = '';
  2439             $part = $parts[$key][1];
  2440 
  2441             $highlight_part = true;
  2442             if ($this->strict_mode && !is_null($parts[$key][0])) {
  2443                 // get the class key for this block of code
  2444                 $script_key = $parts[$key][0];
  2445                 $highlight_part = $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key];
  2446                 if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' &&
  2447                     $this->lexic_permissions['SCRIPT']) {
  2448                     // Add a span element around the source to
  2449                     // highlight the overall source block
  2450                     if (!$this->use_classes &&
  2451                         $this->language_data['STYLES']['SCRIPT'][$script_key] != '') {
  2452                         $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"';
  2453                     } else {
  2454                         $attributes = ' class="sc' . $script_key . '"';
  2455                     }
  2456                     $result .= "<span$attributes>";
  2457                     $STRICTATTRS = $attributes;
  2458                 }
  2459             }
  2460 
  2461             if ($highlight_part) {
  2462                 // Now, highlight the code in this block. This code
  2463                 // is really the engine of GeSHi (along with the method
  2464                 // parse_non_string_part).
  2465 
  2466                 // cache comment regexps incrementally
  2467                 $next_comment_regexp_key = '';
  2468                 $next_comment_regexp_pos = -1;
  2469                 $next_comment_multi_pos = -1;
  2470                 $next_comment_single_pos = -1;
  2471                 $comment_regexp_cache_per_key = array();
  2472                 $comment_multi_cache_per_key = array();
  2473                 $comment_single_cache_per_key = array();
  2474                 $next_open_comment_multi = '';
  2475                 $next_comment_single_key = '';
  2476                 $escape_regexp_cache_per_key = array();
  2477                 $next_escape_regexp_key = '';
  2478                 $next_escape_regexp_pos = -1;
  2479 
  2480                 $length = strlen($part);
  2481                 for ($i = 0; $i < $length; ++$i) {
  2482                     // Get the next char
  2483                     $char = $part[$i];
  2484                     $char_len = 1;
  2485 
  2486                     // update regexp comment cache if needed
  2487                     if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) {
  2488                         $next_comment_regexp_pos = $length;
  2489                         foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) {
  2490                             $match_i = false;
  2491                             if (isset($comment_regexp_cache_per_key[$comment_key]) &&
  2492                                 ($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i ||
  2493                                  $comment_regexp_cache_per_key[$comment_key]['pos'] === false)) {
  2494                                 // we have already matched something
  2495                                 if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) {
  2496                                     // this comment is never matched
  2497                                     continue;
  2498                                 }
  2499                                 $match_i = $comment_regexp_cache_per_key[$comment_key]['pos'];
  2500                             } elseif (
  2501                                 //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
  2502                                 (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $i), $match, PREG_OFFSET_CAPTURE)) ||
  2503                                 (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i))
  2504                                 ) {
  2505                                 $match_i = $match[0][1];
  2506                                 if (GESHI_PHP_PRE_433) {
  2507                                     $match_i += $i;
  2508                                 }
  2509 
  2510                                 $comment_regexp_cache_per_key[$comment_key] = array(
  2511                                     'key' => $comment_key,
  2512                                     'length' => strlen($match[0][0]),
  2513                                     'pos' => $match_i
  2514                                 );
  2515                             } else {
  2516                                 $comment_regexp_cache_per_key[$comment_key]['pos'] = false;
  2517                                 continue;
  2518                             }
  2519 
  2520                             if ($match_i !== false && $match_i < $next_comment_regexp_pos) {
  2521                                 $next_comment_regexp_pos = $match_i;
  2522                                 $next_comment_regexp_key = $comment_key;
  2523                                 if ($match_i === $i) {
  2524                                     break;
  2525                                 }
  2526                             }
  2527                         }
  2528                     }
  2529 
  2530                     $string_started = false;
  2531 
  2532                     if (isset($is_string_starter[$char])) {
  2533                         // Possibly the start of a new string ...
  2534 
  2535                         //Check which starter it was ...
  2536                         //Fix for SF#1932083: Multichar Quotemarks unsupported
  2537                         if (is_array($is_string_starter[$char])) {
  2538                             $char_new = '';
  2539                             foreach ($is_string_starter[$char] as $testchar) {
  2540                                 if ($testchar === substr($part, $i, strlen($testchar)) &&
  2541                                     strlen($testchar) > strlen($char_new)) {
  2542                                     $char_new = $testchar;
  2543                                     $string_started = true;
  2544                                 }
  2545                             }
  2546                             if ($string_started) {
  2547                                 $char = $char_new;
  2548                             }
  2549                         } else {
  2550                             $testchar = $is_string_starter[$char];
  2551                             if ($testchar === substr($part, $i, strlen($testchar))) {
  2552                                 $char = $testchar;
  2553                                 $string_started = true;
  2554                             }
  2555                         }
  2556                         $char_len = strlen($char);
  2557                     }
  2558 
  2559                     if ($string_started && ($i != $next_comment_regexp_pos)) {
  2560                         // Hand out the correct style information for this string
  2561                         $string_key = array_search($char, $this->language_data['QUOTEMARKS']);
  2562                         if (!isset($this->language_data['STYLES']['STRINGS'][$string_key]) ||
  2563                             !isset($this->language_data['STYLES']['ESCAPE_CHAR'][$string_key])) {
  2564                             $string_key = 0;
  2565                         }
  2566 
  2567                         // parse the stuff before this
  2568                         $result .= $this->parse_non_string_part($stuff_to_parse);
  2569                         $stuff_to_parse = '';
  2570 
  2571                         if (!$this->use_classes) {
  2572                             $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][$string_key] . '"';
  2573                         } else {
  2574                             $string_attributes = ' class="st'.$string_key.'"';
  2575                         }
  2576 
  2577                         // now handle the string
  2578                         $string = "<span$string_attributes>" . GeSHi::hsc($char);
  2579                         $start = $i + $char_len;
  2580                         $string_open = true;
  2581 
  2582                         if(empty($this->language_data['ESCAPE_REGEXP'])) {
  2583                             $next_escape_regexp_pos = $length;
  2584                         }
  2585 
  2586                         do {
  2587                             //Get the regular ending pos ...
  2588                             $close_pos = strpos($part, $char, $start);
  2589                             if(false === $close_pos) {
  2590                                 $close_pos = $length;
  2591                             }
  2592 
  2593                             if($this->lexic_permissions['ESCAPE_CHAR']) {
  2594                                 // update escape regexp cache if needed
  2595                                 if (isset($this->language_data['ESCAPE_REGEXP']) && $next_escape_regexp_pos < $start) {
  2596                                     $next_escape_regexp_pos = $length;
  2597                                     foreach ($this->language_data['ESCAPE_REGEXP'] as $escape_key => $regexp) {
  2598                                         $match_i = false;
  2599                                         if (isset($escape_regexp_cache_per_key[$escape_key]) &&
  2600                                             ($escape_regexp_cache_per_key[$escape_key]['pos'] >= $start ||
  2601                                              $escape_regexp_cache_per_key[$escape_key]['pos'] === false)) {
  2602                                             // we have already matched something
  2603                                             if ($escape_regexp_cache_per_key[$escape_key]['pos'] === false) {
  2604                                                 // this comment is never matched
  2605                                                 continue;
  2606                                             }
  2607                                             $match_i = $escape_regexp_cache_per_key[$escape_key]['pos'];
  2608                                         } elseif (
  2609                                             //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
  2610                                             (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $start), $match, PREG_OFFSET_CAPTURE)) ||
  2611                                             (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $start))
  2612                                             ) {
  2613                                             $match_i = $match[0][1];
  2614                                             if (GESHI_PHP_PRE_433) {
  2615                                                 $match_i += $start;
  2616                                             }
  2617 
  2618                                             $escape_regexp_cache_per_key[$escape_key] = array(
  2619                                                 'key' => $escape_key,
  2620                                                 'length' => strlen($match[0][0]),
  2621                                                 'pos' => $match_i
  2622                                             );
  2623                                         } else {
  2624                                             $escape_regexp_cache_per_key[$escape_key]['pos'] = false;
  2625                                             continue;
  2626                                         }
  2627 
  2628                                         if ($match_i !== false && $match_i < $next_escape_regexp_pos) {
  2629                                             $next_escape_regexp_pos = $match_i;
  2630                                             $next_escape_regexp_key = $escape_key;
  2631                                             if ($match_i === $start) {
  2632                                                 break;
  2633                                             }
  2634                                         }
  2635                                     }
  2636                                 }
  2637 
  2638                                 //Find the next simple escape position
  2639                                 if('' != $this->language_data['ESCAPE_CHAR']) {
  2640                                     $simple_escape = strpos($part, $this->language_data['ESCAPE_CHAR'], $start);
  2641                                     if(false === $simple_escape) {
  2642                                         $simple_escape = $length;
  2643                                     }
  2644                                 } else {
  2645                                     $simple_escape = $length;
  2646                                 }
  2647                             } else {
  2648                                 $next_escape_regexp_pos = $length;
  2649                                 $simple_escape = $length;
  2650                             }
  2651 
  2652                             if($simple_escape < $next_escape_regexp_pos &&
  2653                                 $simple_escape < $length &&
  2654                                 $simple_escape < $close_pos) {
  2655                                 //The nexxt escape sequence is a simple one ...
  2656                                 $es_pos = $simple_escape;
  2657 
  2658                                 //Add the stuff not in the string yet ...
  2659                                 $string .= $this->hsc(substr($part, $start, $es_pos - $start));
  2660 
  2661                                 //Get the style for this escaped char ...
  2662                                 if (!$this->use_classes) {
  2663                                     $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"';
  2664                                 } else {
  2665                                     $escape_char_attributes = ' class="es0"';
  2666                                 }
  2667 
  2668                                 //Add the style for the escape char ...
  2669                                 $string .= "<span$escape_char_attributes>" .
  2670                                     GeSHi::hsc($this->language_data['ESCAPE_CHAR']);
  2671 
  2672                                 //Get the byte AFTER the ESCAPE_CHAR we just found
  2673                                 $es_char = $part[$es_pos + 1];
  2674                                 if ($es_char == "\n") {
  2675                                     // don't put a newline around newlines
  2676                                     $string .= "</span>\n";
  2677                                     $start = $es_pos + 2;
  2678                                 } elseif (ord($es_char) >= 128) {
  2679                                     //This is an non-ASCII char (UTF8 or single byte)
  2680                                     //This code tries to work around SF#2037598 ...
  2681                                     if(function_exists('mb_substr')) {
  2682                                         $es_char_m = mb_substr(substr($part, $es_pos+1, 16), 0, 1, $this->encoding);
  2683                                         $string .= $es_char_m . '</span>';
  2684                                     } elseif (!GESHI_PHP_PRE_433 && 'utf-8' == $this->encoding) {
  2685                                         if(preg_match("/[\xC2-\xDF][\x80-\xBF]".
  2686                                             "|\xE0[\xA0-\xBF][\x80-\xBF]".
  2687                                             "|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}".
  2688                                             "|\xED[\x80-\x9F][\x80-\xBF]".
  2689                                             "|\xF0[\x90-\xBF][\x80-\xBF]{2}".
  2690                                             "|[\xF1-\xF3][\x80-\xBF]{3}".
  2691                                             "|\xF4[\x80-\x8F][\x80-\xBF]{2}/s",
  2692                                             $part, $es_char_m, null, $es_pos + 1)) {
  2693                                             $es_char_m = $es_char_m[0];
  2694                                         } else {
  2695                                             $es_char_m = $es_char;
  2696                                         }
  2697                                         $string .= $this->hsc($es_char_m) . '</span>';
  2698                                     } else {
  2699                                         $es_char_m = $this->hsc($es_char);
  2700                                     }
  2701                                     $start = $es_pos + strlen($es_char_m) + 1;
  2702                                 } else {
  2703                                     $string .= $this->hsc($es_char) . '</span>';
  2704                                     $start = $es_pos + 2;
  2705                                 }
  2706                             } elseif ($next_escape_regexp_pos < $length &&
  2707                                 $next_escape_regexp_pos < $close_pos) {
  2708                                 $es_pos = $next_escape_regexp_pos;
  2709                                 //Add the stuff not in the string yet ...
  2710                                 $string .= $this->hsc(substr($part, $start, $es_pos - $start));
  2711 
  2712                                 //Get the key and length of this match ...
  2713                                 $escape = $escape_regexp_cache_per_key[$next_escape_regexp_key];
  2714                                 $escape_str = substr($part, $es_pos, $escape['length']);
  2715                                 $escape_key = $escape['key'];
  2716 
  2717                                 //Get the style for this escaped char ...
  2718                                 if (!$this->use_classes) {
  2719                                     $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][$escape_key] . '"';
  2720                                 } else {
  2721                                     $escape_char_attributes = ' class="es' . $escape_key . '"';
  2722                                 }
  2723 
  2724                                 //Add the style for the escape char ...
  2725                                 $string .= "<span$escape_char_attributes>" .
  2726                                     $this->hsc($escape_str) . '</span>';
  2727 
  2728                                 $start = $es_pos + $escape['length'];
  2729                             } else {
  2730                                 //Copy the remainder of the string ...
  2731                                 $string .= $this->hsc(substr($part, $start, $close_pos - $start + $char_len)) . '</span>';
  2732                                 $start = $close_pos + $char_len;
  2733                                 $string_open = false;
  2734                             }
  2735                         } while($string_open);
  2736 
  2737                         if ($check_linenumbers) {
  2738                             // Are line numbers used? If, we should end the string before
  2739                             // the newline and begin it again (so when <li>s are put in the source
  2740                             // remains XHTML compliant)
  2741                             // note to self: This opens up possibility of config files specifying
  2742                             // that languages can/cannot have multiline strings???
  2743                             $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
  2744                         }
  2745 
  2746                         $result .= $string;
  2747                         $string = '';
  2748                         $i = $start - 1;
  2749                         continue;
  2750                     } elseif ($this->lexic_permissions['STRINGS'] && $hq && $hq[0] == $char &&
  2751                         substr($part, $i, $hq_strlen) == $hq && ($i != $next_comment_regexp_pos)) {
  2752                         // The start of a hard quoted string
  2753                         if (!$this->use_classes) {
  2754                             $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS']['HARD'] . '"';
  2755                             $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR']['HARD'] . '"';
  2756                         } else {
  2757                             $string_attributes = ' class="st_h"';
  2758                             $escape_char_attributes = ' class="es_h"';
  2759                         }
  2760                         // parse the stuff before this
  2761                         $result .= $this->parse_non_string_part($stuff_to_parse);
  2762                         $stuff_to_parse = '';
  2763 
  2764                         // now handle the string
  2765                         $string = '';
  2766 
  2767                         // look for closing quote
  2768                         $start = $i + $hq_strlen;
  2769                         while ($close_pos = strpos($part, $this->language_data['HARDQUOTE'][1], $start)) {
  2770                             $start = $close_pos + 1;
  2771                             if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['HARDCHAR'] &&
  2772                                 (($i + $hq_strlen) != ($close_pos))) { //Support empty string for HQ escapes if Starter = Escape
  2773                                 // make sure this quote is not escaped
  2774                                 foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
  2775                                     if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) {
  2776                                         // check wether this quote is escaped or if it is something like '\\'
  2777                                         $escape_char_pos = $close_pos - 1;
  2778                                         while ($escape_char_pos > 0
  2779                                                 && $part[$escape_char_pos - 1] == $this->language_data['HARDCHAR']) {
  2780                                             --$escape_char_pos;
  2781                                         }
  2782                                         if (($close_pos - $escape_char_pos) & 1) {
  2783                                             // uneven number of escape chars => this quote is escaped
  2784                                             continue 2;
  2785                                         }
  2786                                     }
  2787                                 }
  2788                             }
  2789 
  2790                             // found closing quote
  2791                             break;
  2792                         }
  2793 
  2794                         //Found the closing delimiter?
  2795                         if (!$close_pos) {
  2796                             // span till the end of this $part when no closing delimiter is found
  2797                             $close_pos = $length;
  2798                         }
  2799 
  2800                         //Get the actual string
  2801                         $string = substr($part, $i, $close_pos - $i + 1);
  2802                         $i = $close_pos;
  2803 
  2804                         // handle escape chars and encode html chars
  2805                         // (special because when we have escape chars within our string they may not be escaped)
  2806                         if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) {
  2807                             $start = 0;
  2808                             $new_string = '';
  2809                             while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) {
  2810                                 // hmtl escape stuff before
  2811                                 $new_string .= $this->hsc(substr($string, $start, $es_pos - $start));
  2812                                 // check if this is a hard escape
  2813                                 foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
  2814                                     if (substr($string, $es_pos, strlen($hardescape)) == $hardescape) {
  2815                                         // indeed, this is a hardescape
  2816                                         $new_string .= "<span$escape_char_attributes>" .
  2817                                             $this->hsc($hardescape) . '</span>';
  2818                                         $start = $es_pos + strlen($hardescape);
  2819                                         continue 2;
  2820                                     }
  2821                                 }
  2822                                 // not a hard escape, but a normal escape
  2823                                 // they come in pairs of two
  2824                                 $c = 0;
  2825                                 while (isset($string[$es_pos + $c]) && isset($string[$es_pos + $c + 1])
  2826                                     && $string[$es_pos + $c] == $this->language_data['ESCAPE_CHAR']
  2827                                     && $string[$es_pos + $c + 1] == $this->language_data['ESCAPE_CHAR']) {
  2828                                     $c += 2;
  2829                                 }
  2830                                 if ($c) {
  2831                                     $new_string .= "<span$escape_char_attributes>" .
  2832                                         str_repeat($escaped_escape_char, $c) .
  2833                                         '</span>';
  2834                                     $start = $es_pos + $c;
  2835                                 } else {
  2836                                     // this is just a single lonely escape char...
  2837                                     $new_string .= $escaped_escape_char;
  2838                                     $start = $es_pos + 1;
  2839                                 }
  2840                             }
  2841                             $string = $new_string . $this->hsc(substr($string, $start));
  2842                         } else {
  2843                             $string = $this->hsc($string);
  2844                         }
  2845 
  2846                         if ($check_linenumbers) {
  2847                             // Are line numbers used? If, we should end the string before
  2848                             // the newline and begin it again (so when <li>s are put in the source
  2849                             // remains XHTML compliant)
  2850                             // note to self: This opens up possibility of config files specifying
  2851                             // that languages can/cannot have multiline strings???
  2852                             $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
  2853                         }
  2854 
  2855                         $result .= "<span$string_attributes>" . $string . '</span>';
  2856                         $string = '';
  2857                         continue;
  2858                     } else {
  2859                         //Have a look for regexp comments
  2860                         if ($i == $next_comment_regexp_pos) {
  2861                             $COMMENT_MATCHED = true;
  2862                             $comment = $comment_regexp_cache_per_key[$next_comment_regexp_key];
  2863                             $test_str = $this->hsc(substr($part, $i, $comment['length']));
  2864 
  2865                             //@todo If remove important do remove here
  2866                             if ($this->lexic_permissions['COMMENTS']['MULTI']) {
  2867                                 if (!$this->use_classes) {
  2868                                     $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment['key']] . '"';
  2869                                 } else {
  2870                                     $attributes = ' class="co' . $comment['key'] . '"';
  2871                                 }
  2872 
  2873                                 $test_str = "<span$attributes>" . $test_str . "</span>";
  2874 
  2875                                 // Short-cut through all the multiline code
  2876                                 if ($check_linenumbers) {
  2877                                     // strreplace to put close span and open span around multiline newlines
  2878                                     $test_str = str_replace(
  2879                                         "\n", "</span>\n<span$attributes>",
  2880                                         str_replace("\n ", "\n&nbsp;", $test_str)
  2881                                     );
  2882                                 }
  2883                             }
  2884 
  2885                             $i += $comment['length'] - 1;
  2886 
  2887                             // parse the rest
  2888                             $result .= $this->parse_non_string_part($stuff_to_parse);
  2889                             $stuff_to_parse = '';
  2890                         }
  2891 
  2892                         // If we haven't matched a regexp comment, try multi-line comments
  2893                         if (!$COMMENT_MATCHED) {
  2894                             // Is this a multiline comment?
  2895                             if (!empty($this->language_data['COMMENT_MULTI']) && $next_comment_multi_pos < $i) {
  2896                                 $next_comment_multi_pos = $length;
  2897                                 foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) {
  2898                                     $match_i = false;
  2899                                     if (isset($comment_multi_cache_per_key[$open]) &&
  2900                                         ($comment_multi_cache_per_key[$open] >= $i ||
  2901                                          $comment_multi_cache_per_key[$open] === false)) {
  2902                                         // we have already matched something
  2903                                         if ($comment_multi_cache_per_key[$open] === false) {
  2904                                             // this comment is never matched
  2905                                             continue;
  2906                                         }
  2907                                         $match_i = $comment_multi_cache_per_key[$open];
  2908                                     } elseif (($match_i = stripos($part, $open, $i)) !== false) {
  2909                                         $comment_multi_cache_per_key[$open] = $match_i;
  2910                                     } else {
  2911                                         $comment_multi_cache_per_key[$open] = false;
  2912                                         continue;
  2913                                     }
  2914                                     if ($match_i !== false && $match_i < $next_comment_multi_pos) {
  2915                                         $next_comment_multi_pos = $match_i;
  2916                                         $next_open_comment_multi = $open;
  2917                                         if ($match_i === $i) {
  2918                                             break;
  2919                                         }
  2920                                     }
  2921                                 }
  2922                             }
  2923                             if ($i == $next_comment_multi_pos) {
  2924                                 $open = $next_open_comment_multi;
  2925                                 $close = $this->language_data['COMMENT_MULTI'][$open];
  2926                                 $open_strlen = strlen($open);
  2927                                 $close_strlen = strlen($close);
  2928                                 $COMMENT_MATCHED = true;
  2929                                 $test_str_match = $open;
  2930                                 //@todo If remove important do remove here
  2931                                 if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
  2932                                     $open == GESHI_START_IMPORTANT) {
  2933                                     if ($open != GESHI_START_IMPORTANT) {
  2934                                         if (!$this->use_classes) {
  2935                                             $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"';
  2936                                         } else {
  2937                                             $attributes = ' class="coMULTI"';
  2938                                         }
  2939                                         $test_str = "<span$attributes>" . $this->hsc($open);
  2940                                     } else {
  2941                                         if (!$this->use_classes) {
  2942                                             $attributes = ' style="' . $this->important_styles . '"';
  2943                                         } else {
  2944                                             $attributes = ' class="imp"';
  2945                                         }
  2946 
  2947                                         // We don't include the start of the comment if it's an
  2948                                         // "important" part
  2949                                         $test_str = "<span$attributes>";
  2950                                     }
  2951                                 } else {
  2952                                     $test_str = $this->hsc($open);
  2953                                 }
  2954 
  2955                                 $close_pos = strpos( $part, $close, $i + $open_strlen );
  2956 
  2957                                 if ($close_pos === false) {
  2958                                     $close_pos = $length;
  2959                                 }
  2960 
  2961                                 // Short-cut through all the multiline code
  2962                                 $rest_of_comment = $this->hsc(substr($part, $i + $open_strlen, $close_pos - $i - $open_strlen + $close_strlen));
  2963                                 if (($this->lexic_permissions['COMMENTS']['MULTI'] ||
  2964                                     $test_str_match == GESHI_START_IMPORTANT) &&
  2965                                     $check_linenumbers) {
  2966 
  2967                                     // strreplace to put close span and open span around multiline newlines
  2968                                     $test_str .= str_replace(
  2969                                         "\n", "</span>\n<span$attributes>",
  2970                                         str_replace("\n ", "\n&nbsp;", $rest_of_comment)
  2971                                     );
  2972                                 } else {
  2973                                     $test_str .= $rest_of_comment;
  2974                                 }
  2975 
  2976                                 if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
  2977                                     $test_str_match == GESHI_START_IMPORTANT) {
  2978                                     $test_str .= '</span>';
  2979                                 }
  2980 
  2981                                 $i = $close_pos + $close_strlen - 1;
  2982 
  2983                                 // parse the rest
  2984                                 $result .= $this->parse_non_string_part($stuff_to_parse);
  2985                                 $stuff_to_parse = '';
  2986                             }
  2987                         }
  2988 
  2989                         // If we haven't matched a multiline comment, try single-line comments
  2990                         if (!$COMMENT_MATCHED) {
  2991                             // cache potential single line comment occurances
  2992                             if (!empty($this->language_data['COMMENT_SINGLE']) && $next_comment_single_pos < $i) {
  2993                                 $next_comment_single_pos = $length;
  2994                                 foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key => $comment_mark) {
  2995                                     $match_i = false;
  2996                                     if (isset($comment_single_cache_per_key[$comment_key]) &&
  2997                                         ($comment_single_cache_per_key[$comment_key] >= $i ||
  2998                                          $comment_single_cache_per_key[$comment_key] === false)) {
  2999                                         // we have already matched something
  3000                                         if ($comment_single_cache_per_key[$comment_key] === false) {
  3001                                             // this comment is never matched
  3002                                             continue;
  3003                                         }
  3004                                         $match_i = $comment_single_cache_per_key[$comment_key];
  3005                                     } elseif (
  3006                                         // case sensitive comments
  3007                                         ($this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
  3008                                         ($match_i = stripos($part, $comment_mark, $i)) !== false) ||
  3009                                         // non case sensitive
  3010                                         (!$this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
  3011                                           (($match_i = strpos($part, $comment_mark, $i)) !== false))) {
  3012                                         $comment_single_cache_per_key[$comment_key] = $match_i;
  3013                                     } else {
  3014                                         $comment_single_cache_per_key[$comment_key] = false;
  3015                                         continue;
  3016                                     }
  3017                                     if ($match_i !== false && $match_i < $next_comment_single_pos) {
  3018                                         $next_comment_single_pos = $match_i;
  3019                                         $next_comment_single_key = $comment_key;
  3020                                         if ($match_i === $i) {
  3021                                             break;
  3022                                         }
  3023                                     }
  3024                                 }
  3025                             }
  3026                             if ($next_comment_single_pos == $i) {
  3027                                 $comment_key = $next_comment_single_key;
  3028                                 $comment_mark = $this->language_data['COMMENT_SINGLE'][$comment_key];
  3029                                 $com_len = strlen($comment_mark);
  3030 
  3031                                 // This check will find special variables like $# in bash
  3032                                 // or compiler directives of Delphi beginning {$
  3033                                 if ((empty($sc_disallowed_before) || ($i == 0) ||
  3034                                     (false === strpos($sc_disallowed_before, $part[$i-1]))) &&
  3035                                     (empty($sc_disallowed_after) || ($length <= $i + $com_len) ||
  3036                                     (false === strpos($sc_disallowed_after, $part[$i + $com_len]))))
  3037                                 {
  3038                                     // this is a valid comment
  3039                                     $COMMENT_MATCHED = true;
  3040                                     if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
  3041                                         if (!$this->use_classes) {
  3042                                             $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment_key] . '"';
  3043                                         } else {
  3044                                             $attributes = ' class="co' . $comment_key . '"';
  3045                                         }
  3046                                         $test_str = "<span$attributes>" . $this->hsc($this->change_case($comment_mark));
  3047                                     } else {
  3048                                         $test_str = $this->hsc($comment_mark);
  3049                                     }
  3050 
  3051                                     //Check if this comment is the last in the source
  3052                                     $close_pos = strpos($part, "\n", $i);
  3053                                     $oops = false;
  3054                                     if ($close_pos === false) {
  3055                                         $close_pos = $length;
  3056                                         $oops = true;
  3057                                     }
  3058                                     $test_str .= $this->hsc(substr($part, $i + $com_len, $close_pos - $i - $com_len));
  3059                                     if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
  3060                                         $test_str .= "</span>";
  3061                                     }
  3062 
  3063                                     // Take into account that the comment might be the last in the source
  3064                                     if (!$oops) {
  3065                                       $test_str .= "\n";
  3066                                     }
  3067 
  3068                                     $i = $close_pos;
  3069 
  3070                                     // parse the rest
  3071                                     $result .= $this->parse_non_string_part($stuff_to_parse);
  3072                                     $stuff_to_parse = '';
  3073                                 }
  3074                             }
  3075                         }
  3076                     }
  3077 
  3078                     // Where are we adding this char?
  3079                     if (!$COMMENT_MATCHED) {
  3080                         $stuff_to_parse .= $char;
  3081                     } else {
  3082                         $result .= $test_str;
  3083                         unset($test_str);
  3084                         $COMMENT_MATCHED = false;
  3085                     }
  3086                 }
  3087                 // Parse the last bit
  3088                 $result .= $this->parse_non_string_part($stuff_to_parse);
  3089                 $stuff_to_parse = '';
  3090             } else {
  3091                 $result .= $this->hsc($part);
  3092             }
  3093             // Close the <span> that surrounds the block
  3094             if ($STRICTATTRS != '') {
  3095                 $result = str_replace("\n", "</span>\n<span$STRICTATTRS>", $result);
  3096                 $result .= '</span>';
  3097             }
  3098 
  3099             $endresult .= $result;
  3100             unset($part, $parts[$key], $result);
  3101         }
  3102 
  3103         //This fix is related to SF#1923020, but has to be applied regardless of
  3104         //actually highlighting symbols.
  3105         /** NOTE: memorypeak #3 */
  3106         $endresult = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $endresult);
  3107 
  3108 //        // Parse the last stuff (redundant?)
  3109 //        $result .= $this->parse_non_string_part($stuff_to_parse);
  3110 
  3111         // Lop off the very first and last spaces
  3112 //        $result = substr($result, 1, -1);
  3113 
  3114         // We're finished: stop timing
  3115         $this->set_time($start_time, microtime());
  3116 
  3117         $this->finalise($endresult);
  3118         return $endresult;
  3119     }
  3120 
  3121     /**
  3122      * Swaps out spaces and tabs for HTML indentation. Not needed if
  3123      * the code is in a pre block...
  3124      *
  3125      * @param  string The source to indent (reference!)
  3126      * @since  1.0.0
  3127      * @access private
  3128      */
  3129     function indent(&$result) {
  3130         /// Replace tabs with the correct number of spaces
  3131         if (false !== strpos($result, "\t")) {
  3132             $lines = explode("\n", $result);
  3133             $result = null;//Save memory while we process the lines individually
  3134             $tab_width = $this->get_real_tab_width();
  3135             $tab_string = '&nbsp;' . str_repeat(' ', $tab_width);
  3136 
  3137             for ($key = 0, $n = count($lines); $key < $n; $key++) {
  3138                 $line = $lines[$key];
  3139                 if (false === strpos($line, "\t")) {
  3140                     continue;
  3141                 }
  3142 
  3143                 $pos = 0;
  3144                 $length = strlen($line);
  3145                 $lines[$key] = ''; // reduce memory
  3146 
  3147                 $IN_TAG = false;
  3148                 for ($i = 0; $i < $length; ++$i) {
  3149                     $char = $line[$i];
  3150                     // Simple engine to work out whether we're in a tag.
  3151                     // If we are we modify $pos. This is so we ignore HTML
  3152                     // in the line and only workout the tab replacement
  3153                     // via the actual content of the string
  3154                     // This test could be improved to include strings in the
  3155                     // html so that < or > would be allowed in user's styles
  3156                     // (e.g. quotes: '<' '>'; or similar)
  3157                     if ($IN_TAG) {
  3158                         if ('>' == $char) {
  3159                             $IN_TAG = false;
  3160                         }
  3161                         $lines[$key] .= $char;
  3162                     } elseif ('<' == $char) {
  3163                         $IN_TAG = true;
  3164                         $lines[$key] .= '<';
  3165                     } elseif ('&' == $char) {
  3166                         $substr = substr($line, $i + 3, 5);
  3167                         $posi = strpos($substr, ';');
  3168                         if (false === $posi) {
  3169                             ++$pos;
  3170                         } else {
  3171                             $pos -= $posi+2;
  3172                         }
  3173                         $lines[$key] .= $char;
  3174                     } elseif ("\t" == $char) {
  3175                         $str = '';
  3176                         // OPTIMISE - move $strs out. Make an array:
  3177                         // $tabs = array(
  3178                         //  1 => '&nbsp;',
  3179                         //  2 => '&nbsp; ',
  3180                         //  3 => '&nbsp; &nbsp;' etc etc
  3181                         // to use instead of building a string every time
  3182                         $tab_end_width = $tab_width - ($pos % $tab_width); //Moved out of the look as it doesn't change within the loop
  3183                         if (($pos & 1) || 1 == $tab_end_width) {
  3184                             $str .= substr($tab_string, 6, $tab_end_width);
  3185                         } else {
  3186                             $str .= substr($tab_string, 0, $tab_end_width+5);
  3187                         }
  3188                         $lines[$key] .= $str;
  3189                         $pos += $tab_end_width;
  3190 
  3191                         if (false === strpos($line, "\t", $i + 1)) {
  3192                             $lines[$key] .= substr($line, $i + 1);
  3193                             break;
  3194                         }
  3195                     } elseif (0 == $pos && ' ' == $char) {
  3196                         $lines[$key] .= '&nbsp;';
  3197                         ++$pos;
  3198                     } else {
  3199                         $lines[$key] .= $char;
  3200                         ++$pos;
  3201                     }
  3202                 }
  3203             }
  3204             $result = implode("\n", $lines);
  3205             unset($lines);//We don't need the lines separated beyond this --- free them!
  3206         }
  3207         // Other whitespace
  3208         // BenBE: Fix to reduce the number of replacements to be done
  3209         $result = preg_replace('/^ /m', '&nbsp;', $result);
  3210         $result = str_replace('  ', ' &nbsp;', $result);
  3211 
  3212         if ($this->line_numbers == GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
  3213             if ($this->line_ending === null) {
  3214                 $result = nl2br($result);
  3215             } else {
  3216                 $result = str_replace("\n", $this->line_ending, $result);
  3217             }
  3218         }
  3219     }
  3220 
  3221     /**
  3222      * Changes the case of a keyword for those languages where a change is asked for
  3223      *
  3224      * @param  string The keyword to change the case of
  3225      * @return string The keyword with its case changed
  3226      * @since  1.0.0
  3227      * @access private
  3228      */
  3229     function change_case($instr) {
  3230         switch ($this->language_data['CASE_KEYWORDS']) {
  3231             case GESHI_CAPS_UPPER:
  3232                 return strtoupper($instr);
  3233             case GESHI_CAPS_LOWER:
  3234                 return strtolower($instr);
  3235             default:
  3236                 return $instr;
  3237         }
  3238     }
  3239 
  3240     /**
  3241      * Handles replacements of keywords to include markup and links if requested
  3242      *
  3243      * @param  string The keyword to add the Markup to
  3244      * @return The HTML for the match found
  3245      * @since  1.0.8
  3246      * @access private
  3247      *
  3248      * @todo   Get rid of ender in keyword links
  3249      */
  3250     function handle_keyword_replace($match) {
  3251         $k = $this->_kw_replace_group;
  3252         $keyword = $match[0];
  3253         $keyword_match = $match[1];
  3254 
  3255         $before = '';
  3256         $after = '';
  3257 
  3258         if ($this->keyword_links) {
  3259             // Keyword links have been ebabled
  3260 
  3261             if (isset($this->language_data['URLS'][$k]) &&
  3262                 $this->language_data['URLS'][$k] != '') {
  3263                 // There is a base group for this keyword
  3264 
  3265                 // Old system: strtolower
  3266                 //$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword);
  3267                 // New system: get keyword from language file to get correct case
  3268                 if (!$this->language_data['CASE_SENSITIVE'][$k] &&
  3269                     strpos($this->language_data['URLS'][$k], '{FNAME}') !== false) {
  3270                     foreach ($this->language_data['KEYWORDS'][$k] as $word) {
  3271                         if (strcasecmp($word, $keyword_match) == 0) {
  3272                             break;
  3273                         }
  3274                     }
  3275                 } else {
  3276                     $word = $keyword_match;
  3277                 }
  3278 
  3279                 $before = '<|UR1|"' .
  3280                     str_replace(
  3281                         array(
  3282                             '{FNAME}',
  3283                             '{FNAMEL}',
  3284                             '{FNAMEU}',
  3285                             '.'),
  3286                         array(
  3287                             str_replace('+', '%20', urlencode($this->hsc($word))),
  3288                             str_replace('+', '%20', urlencode($this->hsc(strtolower($word)))),
  3289                             str_replace('+', '%20', urlencode($this->hsc(strtoupper($word)))),
  3290                             '<DOT>'),
  3291                         $this->language_data['URLS'][$k]
  3292                     ) . '">';
  3293                 $after = '</a>';
  3294             }
  3295         }
  3296 
  3297         return $before . '<|/'. $k .'/>' . $this->change_case($keyword) . '|>' . $after;
  3298     }
  3299 
  3300     /**
  3301      * handles regular expressions highlighting-definitions with callback functions
  3302      *
  3303      * @note this is a callback, don't use it directly
  3304      *
  3305      * @param array the matches array
  3306      * @return The highlighted string
  3307      * @since 1.0.8
  3308      * @access private
  3309      */
  3310     function handle_regexps_callback($matches) {
  3311         // before: "' style=\"' . call_user_func(\"$func\", '\\1') . '\"\\1|>'",
  3312         return  ' style="' . call_user_func($this->language_data['STYLES']['REGEXPS'][$this->_rx_key], $matches[1]) . '"'. $matches[1] . '|>';
  3313     }
  3314 
  3315     /**
  3316      * handles newlines in REGEXPS matches. Set the _hmr_* vars before calling this
  3317      *
  3318      * @note this is a callback, don't use it directly
  3319      *
  3320      * @param array the matches array
  3321      * @return string
  3322      * @since 1.0.8
  3323      * @access private
  3324      */
  3325     function handle_multiline_regexps($matches) {
  3326         $before = $this->_hmr_before;
  3327         $after = $this->_hmr_after;
  3328         if ($this->_hmr_replace) {
  3329             $replace = $this->_hmr_replace;
  3330             $search = array();
  3331 
  3332             foreach (array_keys($matches) as $k) {
  3333                 $search[] = '\\' . $k;
  3334             }
  3335 
  3336             $before = str_replace($search, $matches, $before);
  3337             $after = str_replace($search, $matches, $after);
  3338             $replace = str_replace($search, $matches, $replace);
  3339         } else {
  3340             $replace = $matches[0];
  3341         }
  3342         return $before
  3343                     . '<|!REG3XP' . $this->_hmr_key .'!>'
  3344                         . str_replace("\n", "|>\n<|!REG3XP" . $this->_hmr_key . '!>', $replace)
  3345                     . '|>'
  3346               . $after;
  3347     }
  3348 
  3349     /**
  3350      * Takes a string that has no strings or comments in it, and highlights
  3351      * stuff like keywords, numbers and methods.
  3352      *
  3353      * @param string The string to parse for keyword, numbers etc.
  3354      * @since 1.0.0
  3355      * @access private
  3356      * @todo BUGGY! Why? Why not build string and return?
  3357      */
  3358     function parse_non_string_part($stuff_to_parse) {
  3359         $stuff_to_parse = ' ' . $this->hsc($stuff_to_parse);
  3360 
  3361         // Highlight keywords
  3362         $disallowed_before = "(?<![a-zA-Z0-9\$_\|\#|^&";
  3363         $disallowed_after = "(?![a-zA-Z0-9_\|%\\-&;";
  3364         if ($this->lexic_permissions['STRINGS']) {
  3365             $quotemarks = preg_quote(implode($this->language_data['QUOTEMARKS']), '/');
  3366             $disallowed_before .= $quotemarks;
  3367             $disallowed_after .= $quotemarks;
  3368         }
  3369         $disallowed_before .= "])";
  3370         $disallowed_after .= "])";
  3371 
  3372         $parser_control_pergroup = false;
  3373         if (isset($this->language_data['PARSER_CONTROL'])) {
  3374             if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
  3375                 $x = 0; // check wether per-keyword-group parser_control is enabled
  3376                 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'])) {
  3377                     $disallowed_before = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'];
  3378                     ++$x;
  3379                 }
  3380                 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'])) {
  3381                     $disallowed_after = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'];
  3382                     ++$x;
  3383                 }
  3384                 $parser_control_pergroup = (count($this->language_data['PARSER_CONTROL']['KEYWORDS']) - $x) > 0;
  3385             }
  3386         }
  3387 
  3388         foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
  3389             if (!isset($this->lexic_permissions['KEYWORDS'][$k]) ||
  3390                 $this->lexic_permissions['KEYWORDS'][$k]) {
  3391 
  3392                 $case_sensitive = $this->language_data['CASE_SENSITIVE'][$k];
  3393                 $modifiers = $case_sensitive ? '' : 'i';
  3394 
  3395                 // NEW in 1.0.8 - per-keyword-group parser control
  3396                 $disallowed_before_local = $disallowed_before;
  3397                 $disallowed_after_local = $disallowed_after;
  3398                 if ($parser_control_pergroup && isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k])) {
  3399                     if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'])) {
  3400                         $disallowed_before_local =
  3401                             $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'];
  3402                     }
  3403 
  3404                     if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'])) {
  3405                         $disallowed_after_local =
  3406                             $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'];
  3407                     }
  3408                 }
  3409 
  3410                 $this->_kw_replace_group = $k;
  3411 
  3412                 //NEW in 1.0.8, the cached regexp list
  3413                 // since we don't want PHP / PCRE to crash due to too large patterns we split them into smaller chunks
  3414                 for ($set = 0, $set_length = count($this->language_data['CACHED_KEYWORD_LISTS'][$k]); $set <  $set_length; ++$set) {
  3415                     $keywordset =& $this->language_data['CACHED_KEYWORD_LISTS'][$k][$set];
  3416                     // Might make a more unique string for putting the number in soon
  3417                     // Basically, we don't put the styles in yet because then the styles themselves will
  3418                     // get highlighted if the language has a CSS keyword in it (like CSS, for example ;))
  3419                     $stuff_to_parse = preg_replace_callback(
  3420                         "/$disallowed_before_local({$keywordset})(?!\<DOT\>(?:htm|php|aspx?))$disallowed_after_local/$modifiers",
  3421                         array($this, 'handle_keyword_replace'),
  3422                         $stuff_to_parse
  3423                         );
  3424                 }
  3425             }
  3426         }
  3427 
  3428         // Regular expressions
  3429         foreach ($this->language_data['REGEXPS'] as $key => $regexp) {
  3430             if ($this->lexic_permissions['REGEXPS'][$key]) {
  3431                 if (is_array($regexp)) {
  3432                     if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
  3433                         // produce valid HTML when we match multiple lines
  3434                         $this->_hmr_replace = $regexp[GESHI_REPLACE];
  3435                         $this->_hmr_before = $regexp[GESHI_BEFORE];
  3436                         $this->_hmr_key = $key;
  3437                         $this->_hmr_after = $regexp[GESHI_AFTER];
  3438                         $stuff_to_parse = preg_replace_callback(
  3439                             "/" . $regexp[GESHI_SEARCH] . "/{$regexp[GESHI_MODIFIERS]}",
  3440                             array($this, 'handle_multiline_regexps'),
  3441                             $stuff_to_parse);
  3442                         $this->_hmr_replace = false;
  3443                         $this->_hmr_before = '';
  3444                         $this->_hmr_after = '';
  3445                     } else {
  3446                         $stuff_to_parse = preg_replace(
  3447                             '/' . $regexp[GESHI_SEARCH] . '/' . $regexp[GESHI_MODIFIERS],
  3448                             $regexp[GESHI_BEFORE] . '<|!REG3XP'. $key .'!>' . $regexp[GESHI_REPLACE] . '|>' . $regexp[GESHI_AFTER],
  3449                             $stuff_to_parse);
  3450                     }
  3451                 } else {
  3452                     if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
  3453                         // produce valid HTML when we match multiple lines
  3454                         $this->_hmr_key = $key;
  3455                         $stuff_to_parse = preg_replace_callback( "/(" . $regexp . ")/",
  3456                                               array($this, 'handle_multiline_regexps'), $stuff_to_parse);
  3457                         $this->_hmr_key = '';
  3458                     } else {
  3459                         $stuff_to_parse = preg_replace( "/(" . $regexp . ")/", "<|!REG3XP$key!>\\1|>", $stuff_to_parse);
  3460                     }
  3461                 }
  3462             }
  3463         }
  3464 
  3465         // Highlight numbers. As of 1.0.8 we support different types of numbers
  3466         $numbers_found = false;
  3467 
  3468         if ($this->lexic_permissions['NUMBERS'] && preg_match($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'], $stuff_to_parse )) {
  3469             $numbers_found = true;
  3470 
  3471             //For each of the formats ...
  3472             foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
  3473                 //Check if it should be highlighted ...
  3474                 $stuff_to_parse = preg_replace($regexp, "<|/NUM!$id/>\\1|>", $stuff_to_parse);
  3475             }
  3476         }
  3477 
  3478         //
  3479         // Now that's all done, replace /[number]/ with the correct styles
  3480         //
  3481         foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
  3482             if (!$this->use_classes) {
  3483                 $attributes = ' style="' .
  3484                     (isset($this->language_data['STYLES']['KEYWORDS'][$k]) ?
  3485                     $this->language_data['STYLES']['KEYWORDS'][$k] : "") . '"';
  3486             } else {
  3487                 $attributes = ' class="kw' . $k . '"';
  3488             }
  3489             $stuff_to_parse = str_replace("<|/$k/>", "<|$attributes>", $stuff_to_parse);
  3490         }
  3491 
  3492         if ($numbers_found) {
  3493             // Put number styles in
  3494             foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
  3495                 //Commented out for now, as this needs some review ...
  3496                 //                if ($numbers_permissions & $id) {
  3497                 //Get the appropriate style ...
  3498                 //Checking for unset styles is done by the style cache builder ...
  3499                 if (!$this->use_classes) {
  3500                     $attributes = ' style="' . $this->language_data['STYLES']['NUMBERS'][$id] . '"';
  3501                 } else {
  3502                     $attributes = ' class="nu'.$id.'"';
  3503                 }
  3504 
  3505                 //Set in the correct styles ...
  3506                 $stuff_to_parse = str_replace("/NUM!$id/", $attributes, $stuff_to_parse);
  3507                 //                }
  3508             }
  3509         }
  3510 
  3511         // Highlight methods and fields in objects
  3512         if ($this->lexic_permissions['METHODS'] && $this->language_data['OOLANG']) {
  3513             $oolang_spaces = "[\s]*";
  3514             $oolang_before = "";
  3515             $oolang_after = "[a-zA-Z][a-zA-Z0-9_]*";
  3516             if (isset($this->language_data['PARSER_CONTROL'])) {
  3517                 if (isset($this->language_data['PARSER_CONTROL']['OOLANG'])) {
  3518                     if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'])) {
  3519                         $oolang_before = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'];
  3520                     }
  3521                     if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'])) {
  3522                         $oolang_after = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'];
  3523                     }
  3524                     if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'])) {
  3525                         $oolang_spaces = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'];
  3526                     }
  3527                 }
  3528             }
  3529 
  3530             foreach ($this->language_data['OBJECT_SPLITTERS'] as $key => $splitter) {
  3531                 if (false !== strpos($stuff_to_parse, $splitter)) {
  3532                     if (!$this->use_classes) {
  3533                         $attributes = ' style="' . $this->language_data['STYLES']['METHODS'][$key] . '"';
  3534                     } else {
  3535                         $attributes = ' class="me' . $key . '"';
  3536                     }
  3537                     $stuff_to_parse = preg_replace("/($oolang_before)(" . preg_quote($this->language_data['OBJECT_SPLITTERS'][$key], '/') . ")($oolang_spaces)($oolang_after)/", "\\1\\2\\3<|$attributes>\\4|>", $stuff_to_parse);
  3538                 }
  3539             }
  3540         }
  3541 
  3542         //
  3543         // Highlight brackets. Yes, I've tried adding a semi-colon to this list.
  3544         // You try it, and see what happens ;)
  3545         // TODO: Fix lexic permissions not converting entities if shouldn't
  3546         // be highlighting regardless
  3547         //
  3548         if ($this->lexic_permissions['BRACKETS']) {
  3549             $stuff_to_parse = str_replace( $this->language_data['CACHE_BRACKET_MATCH'],
  3550                               $this->language_data['CACHE_BRACKET_REPLACE'], $stuff_to_parse );
  3551         }
  3552 
  3553 
  3554         //FIX for symbol highlighting ...
  3555         if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
  3556             //Get all matches and throw away those witin a block that is already highlighted... (i.e. matched by a regexp)
  3557             $n_symbols = preg_match_all("/<\|(?:<DOT>|[^>])+>(?:(?!\|>).*?)\|>|<\/a>|(?:" . $this->language_data['SYMBOL_SEARCH'] . ")+(?![^<]+?>)/", $stuff_to_parse, $pot_symbols, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
  3558             $global_offset = 0;
  3559             for ($s_id = 0; $s_id < $n_symbols; ++$s_id) {
  3560                 $symbol_match = $pot_symbols[$s_id][0][0];
  3561                 if (strpos($symbol_match, '<') !== false || strpos($symbol_match, '>') !== false) {
  3562                     // already highlighted blocks _must_ include either < or >
  3563                     // so if this conditional applies, we have to skip this match
  3564                     // BenBE: UNLESS the block contains <SEMI> or <PIPE>
  3565                     if(strpos($symbol_match, '<SEMI>') === false &&
  3566                         strpos($symbol_match, '<PIPE>') === false) {
  3567                         continue;
  3568                     }
  3569                 }
  3570 
  3571                 // if we reach this point, we have a valid match which needs to be highlighted
  3572 
  3573                 $symbol_length = strlen($symbol_match);
  3574                 $symbol_offset = $pot_symbols[$s_id][0][1];
  3575                 unset($pot_symbols[$s_id]);
  3576                 $symbol_end = $symbol_length + $symbol_offset;
  3577                 $symbol_hl = "";
  3578 
  3579                 // if we have multiple styles, we have to handle them properly
  3580                 if ($this->language_data['MULTIPLE_SYMBOL_GROUPS']) {
  3581                     $old_sym = -1;
  3582                     // Split the current stuff to replace into its atomic symbols ...
  3583                     preg_match_all("/" . $this->language_data['SYMBOL_SEARCH'] . "/", $symbol_match, $sym_match_syms, PREG_PATTERN_ORDER);
  3584                     foreach ($sym_match_syms[0] as $sym_ms) {
  3585                         //Check if consequtive symbols belong to the same group to save output ...
  3586                         if (isset($this->language_data['SYMBOL_DATA'][$sym_ms])
  3587                             && ($this->language_data['SYMBOL_DATA'][$sym_ms] != $old_sym)) {
  3588                             if (-1 != $old_sym) {
  3589                                 $symbol_hl .= "|>";
  3590                             }
  3591                             $old_sym = $this->language_data['SYMBOL_DATA'][$sym_ms];
  3592                             if (!$this->use_classes) {
  3593                                 $symbol_hl .= '<| style="' . $this->language_data['STYLES']['SYMBOLS'][$old_sym] . '">';
  3594                             } else {
  3595                                 $symbol_hl .= '<| class="sy' . $old_sym . '">';
  3596                             }
  3597                         }
  3598                         $symbol_hl .= $sym_ms;
  3599                     }
  3600                     unset($sym_match_syms);
  3601 
  3602                     //Close remaining tags and insert the replacement at the right position ...
  3603                     //Take caution if symbol_hl is empty to avoid doubled closing spans.
  3604                     if (-1 != $old_sym) {
  3605                         $symbol_hl .= "|>";
  3606                     }
  3607                 } else {
  3608                     if (!$this->use_classes) {
  3609                         $symbol_hl = '<| style="' . $this->language_data['STYLES']['SYMBOLS'][0] . '">';
  3610                     } else {
  3611                         $symbol_hl = '<| class="sy0">';
  3612                     }
  3613                     $symbol_hl .= $symbol_match . '|>';
  3614                 }
  3615 
  3616                 $stuff_to_parse = substr_replace($stuff_to_parse, $symbol_hl, $symbol_offset + $global_offset, $symbol_length);
  3617 
  3618                 // since we replace old text with something of different size,
  3619                 // we'll have to keep track of the differences
  3620                 $global_offset += strlen($symbol_hl) - $symbol_length;
  3621             }
  3622         }
  3623         //FIX for symbol highlighting ...
  3624 
  3625         // Add class/style for regexps
  3626         foreach (array_keys($this->language_data['REGEXPS']) as $key) {
  3627             if ($this->lexic_permissions['REGEXPS'][$key]) {
  3628                 if (is_callable($this->language_data['STYLES']['REGEXPS'][$key])) {
  3629                     $this->_rx_key = $key;
  3630                     $stuff_to_parse = preg_replace_callback("/!REG3XP$key!(.*)\|>/U",
  3631                         array($this, 'handle_regexps_callback'),
  3632                         $stuff_to_parse);
  3633                 } else {
  3634                     if (!$this->use_classes) {
  3635                         $attributes = ' style="' . $this->language_data['STYLES']['REGEXPS'][$key] . '"';
  3636                     } else {
  3637                         if (is_array($this->language_data['REGEXPS'][$key]) &&
  3638                             array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$key])) {
  3639                             $attributes = ' class="' .
  3640                                 $this->language_data['REGEXPS'][$key][GESHI_CLASS] . '"';
  3641                         } else {
  3642                            $attributes = ' class="re' . $key . '"';
  3643                         }
  3644                     }
  3645                     $stuff_to_parse = str_replace("!REG3XP$key!", "$attributes", $stuff_to_parse);
  3646                 }
  3647             }
  3648         }
  3649 
  3650         // Replace <DOT> with . for urls
  3651         $stuff_to_parse = str_replace('<DOT>', '.', $stuff_to_parse);
  3652         // Replace <|UR1| with <a href= for urls also
  3653         if (isset($this->link_styles[GESHI_LINK])) {
  3654             if ($this->use_classes) {
  3655                 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
  3656             } else {
  3657                 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' style="' . $this->link_styles[GESHI_LINK] . '" href=', $stuff_to_parse);
  3658             }
  3659         } else {
  3660             $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
  3661         }
  3662 
  3663         //
  3664         // NOW we add the span thingy ;)
  3665         //
  3666 
  3667         $stuff_to_parse = str_replace('<|', '<span', $stuff_to_parse);
  3668         $stuff_to_parse = str_replace ( '|>', '</span>', $stuff_to_parse );
  3669         return substr($stuff_to_parse, 1);
  3670     }
  3671 
  3672     /**
  3673      * Sets the time taken to parse the code
  3674      *
  3675      * @param microtime The time when parsing started
  3676      * @param microtime The time when parsing ended
  3677      * @since 1.0.2
  3678      * @access private
  3679      */
  3680     function set_time($start_time, $end_time) {
  3681         $start = explode(' ', $start_time);
  3682         $end = explode(' ', $end_time);
  3683         $this->time = $end[0] + $end[1] - $start[0] - $start[1];
  3684     }
  3685 
  3686     /**
  3687      * Gets the time taken to parse the code
  3688      *
  3689      * @return double The time taken to parse the code
  3690      * @since  1.0.2
  3691      */
  3692     function get_time() {
  3693         return $this->time;
  3694     }
  3695 
  3696     /**
  3697      * Merges arrays recursively, overwriting values of the first array with values of later arrays
  3698      *
  3699      * @since 1.0.8
  3700      * @access private
  3701      */
  3702     function merge_arrays() {
  3703         $arrays = func_get_args();
  3704         $narrays = count($arrays);
  3705 
  3706         // check arguments
  3707         // comment out if more performance is necessary (in this case the foreach loop will trigger a warning if the argument is not an array)
  3708         for ($i = 0; $i < $narrays; $i ++) {
  3709             if (!is_array($arrays[$i])) {
  3710                 // also array_merge_recursive returns nothing in this case
  3711                 trigger_error('Argument #' . ($i+1) . ' is not an array - trying to merge array with scalar! Returning false!', E_USER_WARNING);
  3712                 return false;
  3713             }
  3714         }
  3715 
  3716         // the first array is in the output set in every case
  3717         $ret = $arrays[0];
  3718 
  3719         // merege $ret with the remaining arrays
  3720         for ($i = 1; $i < $narrays; $i ++) {
  3721             foreach ($arrays[$i] as $key => $value) {
  3722                 if (is_array($value) && isset($ret[$key])) {
  3723                     // if $ret[$key] is not an array you try to merge an scalar value with an array - the result is not defined (incompatible arrays)
  3724                     // in this case the call will trigger an E_USER_WARNING and the $ret[$key] will be false.
  3725                     $ret[$key] = $this->merge_arrays($ret[$key], $value);
  3726                 } else {
  3727                     $ret[$key] = $value;
  3728                 }
  3729             }
  3730         }
  3731 
  3732         return $ret;
  3733     }
  3734 
  3735     /**
  3736      * Gets language information and stores it for later use
  3737      *
  3738      * @param string The filename of the language file you want to load
  3739      * @since 1.0.0
  3740      * @access private
  3741      * @todo Needs to load keys for lexic permissions for keywords, regexps etc
  3742      */
  3743     function load_language($file_name) {
  3744         if ($file_name == $this->loaded_language) {
  3745             // this file is already loaded!
  3746             return;
  3747         }
  3748 
  3749         //Prepare some stuff before actually loading the language file
  3750         $this->loaded_language = $file_name;
  3751         $this->parse_cache_built = false;
  3752         $this->enable_highlighting();
  3753         $language_data = array();
  3754 
  3755         //Load the language file
  3756         require $file_name;
  3757 
  3758         // Perhaps some checking might be added here later to check that
  3759         // $language data is a valid thing but maybe not
  3760         $this->language_data = $language_data;
  3761 
  3762         // Set strict mode if should be set
  3763         $this->strict_mode = $this->language_data['STRICT_MODE_APPLIES'];
  3764 
  3765         // Set permissions for all lexics to true
  3766         // so they'll be highlighted by default
  3767         foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
  3768             if (!empty($this->language_data['KEYWORDS'][$key])) {
  3769                 $this->lexic_permissions['KEYWORDS'][$key] = true;
  3770             } else {
  3771                 $this->lexic_permissions['KEYWORDS'][$key] = false;
  3772             }
  3773         }
  3774 
  3775         foreach (array_keys($this->language_data['COMMENT_SINGLE']) as $key) {
  3776             $this->lexic_permissions['COMMENTS'][$key] = true;
  3777         }
  3778         foreach (array_keys($this->language_data['REGEXPS']) as $key) {
  3779             $this->lexic_permissions['REGEXPS'][$key] = true;
  3780         }
  3781 
  3782         // for BenBE and future code reviews:
  3783         // we can use empty here since we only check for existance and emptiness of an array
  3784         // if it is not an array at all but rather false or null this will work as intended as well
  3785         // even if $this->language_data['PARSER_CONTROL'] is undefined this won't trigger a notice
  3786         if (!empty($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'])) {
  3787             foreach ($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'] as $flag => $value) {
  3788                 // it's either true or false and maybe is true as well
  3789                 $perm = $value !== GESHI_NEVER;
  3790                 if ($flag == 'ALL') {
  3791                     $this->enable_highlighting($perm);
  3792                     continue;
  3793                 }
  3794                 if (!isset($this->lexic_permissions[$flag])) {
  3795                     // unknown lexic permission
  3796                     continue;
  3797                 }
  3798                 if (is_array($this->lexic_permissions[$flag])) {
  3799                     foreach ($this->lexic_permissions[$flag] as $key => $val) {
  3800                         $this->lexic_permissions[$flag][$key] = $perm;
  3801                     }
  3802                 } else {
  3803                     $this->lexic_permissions[$flag] = $perm;
  3804                 }
  3805             }
  3806             unset($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS']);
  3807         }
  3808 
  3809         //Fix: Problem where hardescapes weren't handled if no ESCAPE_CHAR was given
  3810         //You need to set one for HARDESCAPES only in this case.
  3811         if(!isset($this->language_data['HARDCHAR'])) {
  3812             $this->language_data['HARDCHAR'] = $this->language_data['ESCAPE_CHAR'];
  3813         }
  3814 
  3815         //NEW in 1.0.8: Allow styles to be loaded from a separate file to override defaults
  3816         $style_filename = substr($file_name, 0, -4) . '.style.php';
  3817         if (is_readable($style_filename)) {
  3818             //Clear any style_data that could have been set before ...
  3819             if (isset($style_data)) {
  3820                 unset($style_data);
  3821             }
  3822 
  3823             //Read the Style Information from the style file
  3824             include $style_filename;
  3825 
  3826             //Apply the new styles to our current language styles
  3827             if (isset($style_data) && is_array($style_data)) {
  3828                 $this->language_data['STYLES'] =
  3829                     $this->merge_arrays($this->language_data['STYLES'], $style_data);
  3830             }
  3831         }
  3832     }
  3833 
  3834     /**
  3835      * Takes the parsed code and various options, and creates the HTML
  3836      * surrounding it to make it look nice.
  3837      *
  3838      * @param  string The code already parsed (reference!)
  3839      * @since  1.0.0
  3840      * @access private
  3841      */
  3842     function finalise(&$parsed_code) {
  3843         // Remove end parts of important declarations
  3844         // This is BUGGY!! My fault for bad code: fix coming in 1.2
  3845         // @todo Remove this crap
  3846         if ($this->enable_important_blocks &&
  3847             (strpos($parsed_code, $this->hsc(GESHI_START_IMPORTANT)) === false)) {
  3848             $parsed_code = str_replace($this->hsc(GESHI_END_IMPORTANT), '', $parsed_code);
  3849         }
  3850 
  3851         // Add HTML whitespace stuff if we're using the <div> header
  3852         if ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) {
  3853             $this->indent($parsed_code);
  3854         }
  3855 
  3856         // purge some unnecessary stuff
  3857         /** NOTE: memorypeak #1 */
  3858         $parsed_code = preg_replace('#<span[^>]+>(\s*)</span>#', '\\1', $parsed_code);
  3859 
  3860         // If we are using IDs for line numbers, there needs to be an overall
  3861         // ID set to prevent collisions.
  3862         if ($this->add_ids && !$this->overall_id) {
  3863             $this->overall_id = 'geshi-' . substr(md5(microtime()), 0, 4);
  3864         }
  3865 
  3866         // Get code into lines
  3867         /** NOTE: memorypeak #2 */
  3868         $code = explode("\n", $parsed_code);
  3869         $parsed_code = $this->header();
  3870 
  3871         // If we're using line numbers, we insert <li>s and appropriate
  3872         // markup to style them (otherwise we don't need to do anything)
  3873         if ($this->line_numbers != GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
  3874             // If we're using the <pre> header, we shouldn't add newlines because
  3875             // the <pre> will line-break them (and the <li>s already do this for us)
  3876             $ls = ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) ? "\n" : '';
  3877 
  3878             // Set vars to defaults for following loop
  3879             $i = 0;
  3880 
  3881             // Foreach line...
  3882             for ($i = 0, $n = count($code); $i < $n;) {
  3883                 //Reset the attributes for a new line ...
  3884                 $attrs = array();
  3885 
  3886                 // Make lines have at least one space in them if they're empty
  3887                 // BenBE: Checking emptiness using trim instead of relying on blanks
  3888                 if ('' == trim($code[$i])) {
  3889                     $code[$i] = '&nbsp;';
  3890                 }
  3891 
  3892                 // If this is a "special line"...
  3893                 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
  3894                     $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
  3895                     // Set the attributes to style the line
  3896                     if ($this->use_classes) {
  3897                         //$attr = ' class="li2"';
  3898                         $attrs['class'][] = 'li2';
  3899                         $def_attr = ' class="de2"';
  3900                     } else {
  3901                         //$attr = ' style="' . $this->line_style2 . '"';
  3902                         $attrs['style'][] = $this->line_style2;
  3903                         // This style "covers up" the special styles set for special lines
  3904                         // so that styles applied to special lines don't apply to the actual
  3905                         // code on that line
  3906                         $def_attr = ' style="' . $this->code_style . '"';
  3907                     }
  3908                 } else {
  3909                     if ($this->use_classes) {
  3910                         //$attr = ' class="li1"';
  3911                         $attrs['class'][] = 'li1';
  3912                         $def_attr = ' class="de1"';
  3913                     } else {
  3914                         //$attr = ' style="' . $this->line_style1 . '"';
  3915                         $attrs['style'][] = $this->line_style1;
  3916                         $def_attr = ' style="' . $this->code_style . '"';
  3917                     }
  3918                 }
  3919 
  3920                 //Check which type of tag to insert for this line
  3921                 if ($this->header_type == GESHI_HEADER_PRE_VALID) {
  3922                     $start = "<pre$def_attr>";
  3923                     $end = '</pre>';
  3924                 } else {
  3925                     // Span or div?
  3926                     $start = "<div$def_attr>";
  3927                     $end = '</div>';
  3928                 }
  3929 
  3930                 ++$i;
  3931 
  3932                 // Are we supposed to use ids? If so, add them
  3933                 if ($this->add_ids) {
  3934                     $attrs['id'][] = "$this->overall_id-$i";
  3935                 }
  3936 
  3937                 //Is this some line with extra styles???
  3938                 if (in_array($i, $this->highlight_extra_lines)) {
  3939                     if ($this->use_classes) {
  3940                         if (isset($this->highlight_extra_lines_styles[$i])) {
  3941                             $attrs['class'][] = "lx$i";
  3942                         } else {
  3943                             $attrs['class'][] = "ln-xtra";
  3944                         }
  3945                     } else {
  3946                         array_push($attrs['style'], $this->get_line_style($i));
  3947                     }
  3948                 }
  3949 
  3950                 // Add in the line surrounded by appropriate list HTML
  3951                 $attr_string = '';
  3952                 foreach ($attrs as $key => $attr) {
  3953                     $attr_string .= ' ' . $key . '="' . implode(' ', $attr) . '"';
  3954                 }
  3955 
  3956                 $parsed_code .= "<li$attr_string>$start{$code[$i-1]}$end</li>$ls";
  3957                 unset($code[$i - 1]);
  3958             }
  3959         } else {
  3960             $n = count($code);
  3961             if ($this->use_classes) {
  3962                 $attributes = ' class="de1"';
  3963             } else {
  3964                 $attributes = ' style="'. $this->code_style .'"';
  3965             }
  3966             if ($this->header_type == GESHI_HEADER_PRE_VALID) {
  3967                 $parsed_code .= '<pre'. $attributes .'>';
  3968             } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
  3969                 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
  3970                     if ($this->use_classes) {
  3971                         $attrs = ' class="ln"';
  3972                     } else {
  3973                         $attrs = ' style="'. $this->table_linenumber_style .'"';
  3974                     }
  3975                     $parsed_code .= '<td'.$attrs.'><pre'.$attributes.'>';
  3976                     // get linenumbers
  3977                     // we don't merge it with the for below, since it should be better for
  3978                     // memory consumption this way
  3979                     // @todo: but... actually it would still be somewhat nice to merge the two loops
  3980                     //        the mem peaks are at different positions
  3981                     for ($i = 0; $i < $n; ++$i) {
  3982                         $close = 0;
  3983                         // fancy lines
  3984                         if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
  3985                             $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
  3986                             // Set the attributes to style the line
  3987                             if ($this->use_classes) {
  3988                                 $parsed_code .= '<span class="xtra li2"><span class="de2">';
  3989                             } else {
  3990                                 // This style "covers up" the special styles set for special lines
  3991                                 // so that styles applied to special lines don't apply to the actual
  3992                                 // code on that line
  3993                                 $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
  3994                                                   .'<span style="' . $this->code_style .'">';
  3995                             }
  3996                             $close += 2;
  3997                         }
  3998                         //Is this some line with extra styles???
  3999                         if (in_array($i + 1, $this->highlight_extra_lines)) {
  4000                             if ($this->use_classes) {
  4001                                 if (isset($this->highlight_extra_lines_styles[$i])) {
  4002                                     $parsed_code .= "<span class=\"xtra lx$i\">";
  4003                                 } else {
  4004                                     $parsed_code .= "<span class=\"xtra ln-xtra\">";
  4005                                 }
  4006                             } else {
  4007                                 $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
  4008                             }
  4009                             ++$close;
  4010                         }
  4011                         $parsed_code .= $this->line_numbers_start + $i;
  4012                         if ($close) {
  4013                             $parsed_code .= str_repeat('</span>', $close);
  4014                         } elseif ($i != $n) {
  4015                             $parsed_code .= "\n";
  4016                         }
  4017                     }
  4018                     $parsed_code .= '</pre></td><td'.$attributes.'>';
  4019                 }
  4020                 $parsed_code .= '<pre'. $attributes .'>';
  4021             }
  4022             // No line numbers, but still need to handle highlighting lines extra.
  4023             // Have to use divs so the full width of the code is highlighted
  4024             $close = 0;
  4025             for ($i = 0; $i < $n; ++$i) {
  4026                 // Make lines have at least one space in them if they're empty
  4027                 // BenBE: Checking emptiness using trim instead of relying on blanks
  4028                 if ('' == trim($code[$i])) {
  4029                     $code[$i] = '&nbsp;';
  4030                 }
  4031                 // fancy lines
  4032                 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
  4033                     $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
  4034                     // Set the attributes to style the line
  4035                     if ($this->use_classes) {
  4036                         $parsed_code .= '<span class="xtra li2"><span class="de2">';
  4037                     } else {
  4038                         // This style "covers up" the special styles set for special lines
  4039                         // so that styles applied to special lines don't apply to the actual
  4040                         // code on that line
  4041                         $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
  4042                                           .'<span style="' . $this->code_style .'">';
  4043                     }
  4044                     $close += 2;
  4045                 }
  4046                 //Is this some line with extra styles???
  4047                 if (in_array($i + 1, $this->highlight_extra_lines)) {
  4048                     if ($this->use_classes) {
  4049                         if (isset($this->highlight_extra_lines_styles[$i])) {
  4050                             $parsed_code .= "<span class=\"xtra lx$i\">";
  4051                         } else {
  4052                             $parsed_code .= "<span class=\"xtra ln-xtra\">";
  4053                         }
  4054                     } else {
  4055                         $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
  4056                     }
  4057                     ++$close;
  4058                 }
  4059 
  4060                 $parsed_code .= $code[$i];
  4061 
  4062                 if ($close) {
  4063                   $parsed_code .= str_repeat('</span>', $close);
  4064                   $close = 0;
  4065                 }
  4066                 elseif ($i + 1 < $n) {
  4067                     $parsed_code .= "\n";
  4068                 }
  4069                 unset($code[$i]);
  4070             }
  4071 
  4072             if ($this->header_type == GESHI_HEADER_PRE_VALID || $this->header_type == GESHI_HEADER_PRE_TABLE) {
  4073                 $parsed_code .= '</pre>';
  4074             }
  4075             if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
  4076                 $parsed_code .= '</td>';
  4077             }
  4078         }
  4079 
  4080         $parsed_code .= $this->footer();
  4081     }
  4082 
  4083     /**
  4084      * Creates the header for the code block (with correct attributes)
  4085      *
  4086      * @return string The header for the code block
  4087      * @since  1.0.0
  4088      * @access private
  4089      */
  4090     function header() {
  4091         // Get attributes needed
  4092         /**
  4093          * @todo   Document behaviour change - class is outputted regardless of whether
  4094          *         we're using classes or not. Same with style
  4095          */
  4096         $attributes = ' class="' . $this->_genCSSName($this->language);
  4097         if ($this->overall_class != '') {
  4098             $attributes .= " ".$this->_genCSSName($this->overall_class);
  4099         }
  4100         $attributes .= '"';
  4101 
  4102         if ($this->overall_id != '') {
  4103             $attributes .= " id=\"{$this->overall_id}\"";
  4104         }
  4105         if ($this->overall_style != '' && !$this->use_classes) {
  4106             $attributes .= ' style="' . $this->overall_style . '"';
  4107         }
  4108 
  4109         $ol_attributes = '';
  4110 
  4111         if ($this->line_numbers_start != 1) {
  4112             $ol_attributes .= ' start="' . $this->line_numbers_start . '"';
  4113         }
  4114 
  4115         // Get the header HTML
  4116         $header = $this->header_content;
  4117         if ($header) {
  4118             if ($this->header_type == GESHI_HEADER_PRE || $this->header_type == GESHI_HEADER_PRE_VALID) {
  4119                 $header = str_replace("\n", '', $header);
  4120             }
  4121             $header = $this->replace_keywords($header);
  4122 
  4123             if ($this->use_classes) {
  4124                 $attr = ' class="head"';
  4125             } else {
  4126                 $attr = " style=\"{$this->header_content_style}\"";
  4127             }
  4128             if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
  4129                 $header = "<thead><tr><td colspan=\"2\" $attr>$header</td></tr></thead>";
  4130             } else {
  4131                 $header = "<div$attr>$header</div>";
  4132             }
  4133         }
  4134 
  4135         if (GESHI_HEADER_NONE == $this->header_type) {
  4136             if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
  4137                 return "$header<ol$attributes$ol_attributes>";
  4138             }
  4139             return $header . ($this->force_code_block ? '<div>' : '');
  4140         }
  4141 
  4142         // Work out what to return and do it
  4143         if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
  4144             if ($this->header_type == GESHI_HEADER_PRE) {
  4145                 return "<pre$attributes>$header<ol$ol_attributes>";
  4146             } elseif ($this->header_type == GESHI_HEADER_DIV ||
  4147                 $this->header_type == GESHI_HEADER_PRE_VALID) {
  4148                 return "<div$attributes>$header<ol$ol_attributes>";
  4149             } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
  4150                 return "<table$attributes>$header<tbody><tr class=\"li1\">";
  4151             }
  4152         } else {
  4153             if ($this->header_type == GESHI_HEADER_PRE) {
  4154                 return "<pre$attributes>$header"  .
  4155                     ($this->force_code_block ? '<div>' : '');
  4156             } else {
  4157                 return "<div$attributes>$header" .
  4158                     ($this->force_code_block ? '<div>' : '');
  4159             }
  4160         }
  4161     }
  4162 
  4163     /**
  4164      * Returns the footer for the code block.
  4165      *
  4166      * @return string The footer for the code block
  4167      * @since  1.0.0
  4168      * @access private
  4169      */
  4170     function footer() {
  4171         $footer = $this->footer_content;
  4172         if ($footer) {
  4173             if ($this->header_type == GESHI_HEADER_PRE) {
  4174                 $footer = str_replace("\n", '', $footer);;
  4175             }
  4176             $footer = $this->replace_keywords($footer);
  4177 
  4178             if ($this->use_classes) {
  4179                 $attr = ' class="foot"';
  4180             } else {
  4181                 $attr = " style=\"{$this->footer_content_style}\"";
  4182             }
  4183             if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
  4184                 $footer = "<tfoot><tr><td colspan=\"2\">$footer</td></tr></tfoot>";
  4185             } else {
  4186                 $footer = "<div$attr>$footer</div>";
  4187             }
  4188         }
  4189 
  4190         if (GESHI_HEADER_NONE == $this->header_type) {
  4191             return ($this->line_numbers != GESHI_NO_LINE_NUMBERS) ? '</ol>' . $footer : $footer;
  4192         }
  4193 
  4194         if ($this->header_type == GESHI_HEADER_DIV || $this->header_type == GESHI_HEADER_PRE_VALID) {
  4195             if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
  4196                 return "</ol>$footer</div>";
  4197             }
  4198             return ($this->force_code_block ? '</div>' : '') .
  4199                 "$footer</div>";
  4200         }
  4201         elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
  4202             if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
  4203                 return "</tr></tbody>$footer</table>";
  4204             }
  4205             return ($this->force_code_block ? '</div>' : '') .
  4206                 "$footer</div>";
  4207         }
  4208         else {
  4209             if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
  4210                 return "</ol>$footer</pre>";
  4211             }
  4212             return ($this->force_code_block ? '</div>' : '') .
  4213                 "$footer</pre>";
  4214         }
  4215     }
  4216 
  4217     /**
  4218      * Replaces certain keywords in the header and footer with
  4219      * certain configuration values
  4220      *
  4221      * @param  string The header or footer content to do replacement on
  4222      * @return string The header or footer with replaced keywords
  4223      * @since  1.0.2
  4224      * @access private
  4225      */
  4226     function replace_keywords($instr) {
  4227         $keywords = $replacements = array();
  4228 
  4229         $keywords[] = '<TIME>';
  4230         $keywords[] = '{TIME}';
  4231         $replacements[] = $replacements[] = number_format($time = $this->get_time(), 3);
  4232 
  4233         $keywords[] = '<LANGUAGE>';
  4234         $keywords[] = '{LANGUAGE}';
  4235         $replacements[] = $replacements[] = $this->language_data['LANG_NAME'];
  4236 
  4237         $keywords[] = '<VERSION>';
  4238         $keywords[] = '{VERSION}';
  4239         $replacements[] = $replacements[] = GESHI_VERSION;
  4240 
  4241         $keywords[] = '<SPEED>';
  4242         $keywords[] = '{SPEED}';
  4243         if ($time <= 0) {
  4244             $speed = 'N/A';
  4245         } else {
  4246             $speed = strlen($this->source) / $time;
  4247             if ($speed >= 1024) {
  4248                 $speed = sprintf("%.2f KB/s", $speed / 1024.0);
  4249             } else {
  4250                 $speed = sprintf("%.0f B/s", $speed);
  4251             }
  4252         }
  4253         $replacements[] = $replacements[] = $speed;
  4254 
  4255         return str_replace($keywords, $replacements, $instr);
  4256     }
  4257 
  4258     /**
  4259      * Secure replacement for PHP built-in function htmlspecialchars().
  4260      *
  4261      * See ticket #427 (http://wush.net/trac/wikka/ticket/427) for the rationale
  4262      * for this replacement function.
  4263      *
  4264      * The INTERFACE for this function is almost the same as that for
  4265      * htmlspecialchars(), with the same default for quote style; however, there
  4266      * is no 'charset' parameter. The reason for this is as follows:
  4267      *
  4268      * The PHP docs say:
  4269      *      "The third argument charset defines character set used in conversion."
  4270      *
  4271      * I suspect PHP's htmlspecialchars() is working at the byte-value level and
  4272      * thus _needs_ to know (or asssume) a character set because the special
  4273      * characters to be replaced could exist at different code points in
  4274      * different character sets. (If indeed htmlspecialchars() works at
  4275      * byte-value level that goes some  way towards explaining why the
  4276      * vulnerability would exist in this function, too, and not only in
  4277      * htmlentities() which certainly is working at byte-value level.)
  4278      *
  4279      * This replacement function however works at character level and should
  4280      * therefore be "immune" to character set differences - so no charset
  4281      * parameter is needed or provided. If a third parameter is passed, it will
  4282      * be silently ignored.
  4283      *
  4284      * In the OUTPUT there is a minor difference in that we use '&#39;' instead
  4285      * of PHP's '&#039;' for a single quote: this provides compatibility with
  4286      *      get_html_translation_table(HTML_SPECIALCHARS, ENT_QUOTES)
  4287      * (see comment by mikiwoz at yahoo dot co dot uk on
  4288      * http://php.net/htmlspecialchars); it also matches the entity definition
  4289      * for XML 1.0
  4290      * (http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters).
  4291      * Like PHP we use a numeric character reference instead of '&apos;' for the
  4292      * single quote. For the other special characters we use the named entity
  4293      * references, as PHP is doing.
  4294      *
  4295      * @author      {@link http://wikkawiki.org/JavaWoman Marjolein Katsma}
  4296      *
  4297      * @license     http://www.gnu.org/copyleft/lgpl.html
  4298      *              GNU Lesser General Public License
  4299      * @copyright   Copyright 2007, {@link http://wikkawiki.org/CreditsPage
  4300      *              Wikka Development Team}
  4301      *
  4302      * @access      private
  4303      * @param       string  $string string to be converted
  4304      * @param       integer $quote_style
  4305      *                      - ENT_COMPAT:   escapes &, <, > and double quote (default)
  4306      *                      - ENT_NOQUOTES: escapes only &, < and >
  4307      *                      - ENT_QUOTES:   escapes &, <, >, double and single quotes
  4308      * @return      string  converted string
  4309      * @since       1.0.7.18
  4310      */
  4311     function hsc($string, $quote_style = ENT_COMPAT) {
  4312         // init
  4313         static $aTransSpecchar = array(
  4314             '&' => '&amp;',
  4315             '"' => '&quot;',
  4316             '<' => '&lt;',
  4317             '>' => '&gt;',
  4318 
  4319             //This fix is related to SF#1923020, but has to be applied
  4320             //regardless of actually highlighting symbols.
  4321 
  4322             //Circumvent a bug with symbol highlighting
  4323             //This is required as ; would produce undesirable side-effects if it
  4324             //was not to be processed as an entity.
  4325             ';' => '<SEMI>', // Force ; to be processed as entity
  4326             '|' => '<PIPE>' // Force | to be processed as entity
  4327             );                      // ENT_COMPAT set
  4328 
  4329         switch ($quote_style) {
  4330             case ENT_NOQUOTES: // don't convert double quotes
  4331                 unset($aTransSpecchar['"']);
  4332                 break;
  4333             case ENT_QUOTES: // convert single quotes as well
  4334                 $aTransSpecchar["'"] = '&#39;'; // (apos) htmlspecialchars() uses '&#039;'
  4335                 break;
  4336         }
  4337 
  4338         // return translated string
  4339         return strtr($string, $aTransSpecchar);
  4340     }
  4341 
  4342     function _genCSSName($name){
  4343         return (is_numeric($name[0]) ? '_' : '') . $name;
  4344     }
  4345 
  4346     /**
  4347      * Returns a stylesheet for the highlighted code. If $economy mode
  4348      * is true, we only return the stylesheet declarations that matter for
  4349      * this code block instead of the whole thing
  4350      *
  4351      * @param  boolean Whether to use economy mode or not
  4352      * @return string A stylesheet built on the data for the current language
  4353      * @since  1.0.0
  4354      */
  4355     function get_stylesheet($economy_mode = true) {
  4356         // If there's an error, chances are that the language file
  4357         // won't have populated the language data file, so we can't
  4358         // risk getting a stylesheet...
  4359         if ($this->error) {
  4360             return '';
  4361         }
  4362 
  4363         //Check if the style rearrangements have been processed ...
  4364         //This also does some preprocessing to check which style groups are useable ...
  4365         if(!isset($this->language_data['NUMBERS_CACHE'])) {
  4366             $this->build_style_cache();
  4367         }
  4368 
  4369         // First, work out what the selector should be. If there's an ID,
  4370         // that should be used, the same for a class. Otherwise, a selector
  4371         // of '' means that these styles will be applied anywhere
  4372         if ($this->overall_id) {
  4373             $selector = '#' . $this->_genCSSName($this->overall_id);
  4374         } else {
  4375             $selector = '.' . $this->_genCSSName($this->language);
  4376             if ($this->overall_class) {
  4377                 $selector .= '.' . $this->_genCSSName($this->overall_class);
  4378             }
  4379         }
  4380         $selector .= ' ';
  4381 
  4382         // Header of the stylesheet
  4383         if (!$economy_mode) {
  4384             $stylesheet = "/**\n".
  4385                 " * GeSHi Dynamically Generated Stylesheet\n".
  4386                 " * --------------------------------------\n".
  4387                 " * Dynamically generated stylesheet for {$this->language}\n".
  4388                 " * CSS class: {$this->overall_class}, CSS id: {$this->overall_id}\n".
  4389                 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" .
  4390                 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
  4391                 " * --------------------------------------\n".
  4392                 " */\n";
  4393         } else {
  4394             $stylesheet = "/**\n".
  4395                 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" .
  4396                 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
  4397                 " */\n";
  4398         }
  4399 
  4400         // Set the <ol> to have no effect at all if there are line numbers
  4401         // (<ol>s have margins that should be destroyed so all layout is
  4402         // controlled by the set_overall_style method, which works on the
  4403         // <pre> or <div> container). Additionally, set default styles for lines
  4404         if (!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
  4405             //$stylesheet .= "$selector, {$selector}ol, {$selector}ol li {margin: 0;}\n";
  4406             $stylesheet .= "$selector.de1, $selector.de2 {{$this->code_style}}\n";
  4407         }
  4408 
  4409         // Add overall styles
  4410         // note: neglect economy_mode, empty styles are meaningless
  4411         if ($this->overall_style != '') {
  4412             $stylesheet .= "$selector {{$this->overall_style}}\n";
  4413         }
  4414 
  4415         // Add styles for links
  4416         // note: economy mode does not make _any_ sense here
  4417         //       either the style is empty and thus no selector is needed
  4418         //       or the appropriate key is given.
  4419         foreach ($this->link_styles as $key => $style) {
  4420             if ($style != '') {
  4421                 switch ($key) {
  4422                     case GESHI_LINK:
  4423                         $stylesheet .= "{$selector}a:link {{$style}}\n";
  4424                         break;
  4425                     case GESHI_HOVER:
  4426                         $stylesheet .= "{$selector}a:hover {{$style}}\n";
  4427                         break;
  4428                     case GESHI_ACTIVE:
  4429                         $stylesheet .= "{$selector}a:active {{$style}}\n";
  4430                         break;
  4431                     case GESHI_VISITED:
  4432                         $stylesheet .= "{$selector}a:visited {{$style}}\n";
  4433                         break;
  4434                 }
  4435             }
  4436         }
  4437 
  4438         // Header and footer
  4439         // note: neglect economy_mode, empty styles are meaningless
  4440         if ($this->header_content_style != '') {
  4441             $stylesheet .= "$selector.head {{$this->header_content_style}}\n";
  4442         }
  4443         if ($this->footer_content_style != '') {
  4444             $stylesheet .= "$selector.foot {{$this->footer_content_style}}\n";
  4445         }
  4446 
  4447         // Styles for important stuff
  4448         // note: neglect economy_mode, empty styles are meaningless
  4449         if ($this->important_styles != '') {
  4450             $stylesheet .= "$selector.imp {{$this->important_styles}}\n";
  4451         }
  4452 
  4453         // Simple line number styles
  4454         if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->line_style1 != '') {
  4455             $stylesheet .= "{$selector}li, {$selector}.li1 {{$this->line_style1}}\n";
  4456         }
  4457         if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->table_linenumber_style != '') {
  4458             $stylesheet .= "{$selector}.ln {{$this->table_linenumber_style}}\n";
  4459         }
  4460         // If there is a style set for fancy line numbers, echo it out
  4461         if ((!$economy_mode || $this->line_numbers == GESHI_FANCY_LINE_NUMBERS) && $this->line_style2 != '') {
  4462             $stylesheet .= "{$selector}.li2 {{$this->line_style2}}\n";
  4463         }
  4464 
  4465         // note: empty styles are meaningless
  4466         foreach ($this->language_data['STYLES']['KEYWORDS'] as $group => $styles) {
  4467             if ($styles != '' && (!$economy_mode ||
  4468                 (isset($this->lexic_permissions['KEYWORDS'][$group]) &&
  4469                 $this->lexic_permissions['KEYWORDS'][$group]))) {
  4470                 $stylesheet .= "$selector.kw$group {{$styles}}\n";
  4471             }
  4472         }
  4473         foreach ($this->language_data['STYLES']['COMMENTS'] as $group => $styles) {
  4474             if ($styles != '' && (!$economy_mode ||
  4475                 (isset($this->lexic_permissions['COMMENTS'][$group]) &&
  4476                 $this->lexic_permissions['COMMENTS'][$group]) ||
  4477                 (!empty($this->language_data['COMMENT_REGEXP']) &&
  4478                 !empty($this->language_data['COMMENT_REGEXP'][$group])))) {
  4479                 $stylesheet .= "$selector.co$group {{$styles}}\n";
  4480             }
  4481         }
  4482         foreach ($this->language_data['STYLES']['ESCAPE_CHAR'] as $group => $styles) {
  4483             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['ESCAPE_CHAR'])) {
  4484                 // NEW: since 1.0.8 we have to handle hardescapes
  4485                 if ($group === 'HARD') {
  4486                     $group = '_h';
  4487                 }
  4488                 $stylesheet .= "$selector.es$group {{$styles}}\n";
  4489             }
  4490         }
  4491         foreach ($this->language_data['STYLES']['BRACKETS'] as $group => $styles) {
  4492             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['BRACKETS'])) {
  4493                 $stylesheet .= "$selector.br$group {{$styles}}\n";
  4494             }
  4495         }
  4496         foreach ($this->language_data['STYLES']['SYMBOLS'] as $group => $styles) {
  4497             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['SYMBOLS'])) {
  4498                 $stylesheet .= "$selector.sy$group {{$styles}}\n";
  4499             }
  4500         }
  4501         foreach ($this->language_data['STYLES']['STRINGS'] as $group => $styles) {
  4502             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['STRINGS'])) {
  4503                 // NEW: since 1.0.8 we have to handle hardquotes
  4504                 if ($group === 'HARD') {
  4505                     $group = '_h';
  4506                 }
  4507                 $stylesheet .= "$selector.st$group {{$styles}}\n";
  4508             }
  4509         }
  4510         foreach ($this->language_data['STYLES']['NUMBERS'] as $group => $styles) {
  4511             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['NUMBERS'])) {
  4512                 $stylesheet .= "$selector.nu$group {{$styles}}\n";
  4513             }
  4514         }
  4515         foreach ($this->language_data['STYLES']['METHODS'] as $group => $styles) {
  4516             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['METHODS'])) {
  4517                 $stylesheet .= "$selector.me$group {{$styles}}\n";
  4518             }
  4519         }
  4520         // note: neglect economy_mode, empty styles are meaningless
  4521         foreach ($this->language_data['STYLES']['SCRIPT'] as $group => $styles) {
  4522             if ($styles != '') {
  4523                 $stylesheet .= "$selector.sc$group {{$styles}}\n";
  4524             }
  4525         }
  4526         foreach ($this->language_data['STYLES']['REGEXPS'] as $group => $styles) {
  4527             if ($styles != '' && (!$economy_mode ||
  4528                 (isset($this->lexic_permissions['REGEXPS'][$group]) &&
  4529                 $this->lexic_permissions['REGEXPS'][$group]))) {
  4530                 if (is_array($this->language_data['REGEXPS'][$group]) &&
  4531                     array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$group])) {
  4532                     $stylesheet .= "$selector.";
  4533                     $stylesheet .= $this->language_data['REGEXPS'][$group][GESHI_CLASS];
  4534                     $stylesheet .= " {{$styles}}\n";
  4535                 } else {
  4536                     $stylesheet .= "$selector.re$group {{$styles}}\n";
  4537                 }
  4538             }
  4539         }
  4540         // Styles for lines being highlighted extra
  4541         if (!$economy_mode || (count($this->highlight_extra_lines)!=count($this->highlight_extra_lines_styles))) {
  4542             $stylesheet .= "{$selector}.ln-xtra, {$selector}li.ln-xtra, {$selector}div.ln-xtra {{$this->highlight_extra_lines_style}}\n";
  4543         }
  4544         $stylesheet .= "{$selector}span.xtra { display:block; }\n";
  4545         foreach ($this->highlight_extra_lines_styles as $lineid => $linestyle) {
  4546             $stylesheet .= "{$selector}.lx$lineid, {$selector}li.lx$lineid, {$selector}div.lx$lineid {{$linestyle}}\n";
  4547         }
  4548 
  4549         return $stylesheet;
  4550     }
  4551 
  4552     /**
  4553      * Get's the style that is used for the specified line
  4554      *
  4555      * @param int The line number information is requested for
  4556      * @access private
  4557      * @since 1.0.7.21
  4558      */
  4559     function get_line_style($line) {
  4560         //$style = null;
  4561         $style = null;
  4562         if (isset($this->highlight_extra_lines_styles[$line])) {
  4563             $style = $this->highlight_extra_lines_styles[$line];
  4564         } else { // if no "extra" style assigned
  4565             $style = $this->highlight_extra_lines_style;
  4566         }
  4567 
  4568         return $style;
  4569     }
  4570 
  4571     /**
  4572     * this functions creates an optimized regular expression list
  4573     * of an array of strings.
  4574     *
  4575     * Example:
  4576     * <code>$list = array('faa', 'foo', 'foobar');
  4577     *          => string 'f(aa|oo(bar)?)'</code>
  4578     *
  4579     * @param $list array of (unquoted) strings
  4580     * @param $regexp_delimiter your regular expression delimiter, @see preg_quote()
  4581     * @return string for regular expression
  4582     * @author Milian Wolff <mail@milianw.de>
  4583     * @since 1.0.8
  4584     * @access private
  4585     */
  4586     function optimize_regexp_list($list, $regexp_delimiter = '/') {
  4587         $regex_chars = array('.', '\\', '+', '-', '*', '?', '[', '^', ']', '$',
  4588             '(', ')', '{', '}', '=', '!', '<', '>', '|', ':', $regexp_delimiter);
  4589         sort($list);
  4590         $regexp_list = array('');
  4591         $num_subpatterns = 0;
  4592         $list_key = 0;
  4593 
  4594         // the tokens which we will use to generate the regexp list
  4595         $tokens = array();
  4596         $prev_keys = array();
  4597         // go through all entries of the list and generate the token list
  4598         $cur_len = 0;
  4599         for ($i = 0, $i_max = count($list); $i < $i_max; ++$i) {
  4600             if ($cur_len > GESHI_MAX_PCRE_LENGTH) {
  4601                 // seems like the length of this pcre is growing exorbitantly
  4602                 $regexp_list[++$list_key] = $this->_optimize_regexp_list_tokens_to_string($tokens);
  4603                 $num_subpatterns = substr_count($regexp_list[$list_key], '(?:');
  4604                 $tokens = array();
  4605                 $cur_len = 0;
  4606             }
  4607             $level = 0;
  4608             $entry = preg_quote((string) $list[$i], $regexp_delimiter);
  4609             $pointer = &$tokens;
  4610             // properly assign the new entry to the correct position in the token array
  4611             // possibly generate smaller common denominator keys
  4612             while (true) {
  4613                 // get the common denominator
  4614                 if (isset($prev_keys[$level])) {
  4615                     if ($prev_keys[$level] == $entry) {
  4616                         // this is a duplicate entry, skip it
  4617                         continue 2;
  4618                     }
  4619                     $char = 0;
  4620                     while (isset($entry[$char]) && isset($prev_keys[$level][$char])
  4621                             && $entry[$char] == $prev_keys[$level][$char]) {
  4622                         ++$char;
  4623                     }
  4624                     if ($char > 0) {
  4625                         // this entry has at least some chars in common with the current key
  4626                         if ($char == strlen($prev_keys[$level])) {
  4627                             // current key is totally matched, i.e. this entry has just some bits appended
  4628                             $pointer = &$pointer[$prev_keys[$level]];
  4629                         } else {
  4630                             // only part of the keys match
  4631                             $new_key_part1 = substr($prev_keys[$level], 0, $char);
  4632                             $new_key_part2 = substr($prev_keys[$level], $char);
  4633 
  4634                             if (in_array($new_key_part1[0], $regex_chars)
  4635                                 || in_array($new_key_part2[0], $regex_chars)) {
  4636                                 // this is bad, a regex char as first character
  4637                                 $pointer[$entry] = array('' => true);
  4638                                 array_splice($prev_keys, $level, count($prev_keys), $entry);
  4639                                 $cur_len += strlen($entry);
  4640                                 continue;
  4641                             } else {
  4642                                 // relocate previous tokens
  4643                                 $pointer[$new_key_part1] = array($new_key_part2 => $pointer[$prev_keys[$level]]);
  4644                                 unset($pointer[$prev_keys[$level]]);
  4645                                 $pointer = &$pointer[$new_key_part1];
  4646                                 // recreate key index
  4647                                 array_splice($prev_keys, $level, count($prev_keys), array($new_key_part1, $new_key_part2));
  4648                                 $cur_len += strlen($new_key_part2);
  4649                             }
  4650                         }
  4651                         ++$level;
  4652                         $entry = substr($entry, $char);
  4653                         continue;
  4654                     }
  4655                     // else: fall trough, i.e. no common denominator was found
  4656                 }
  4657                 if ($level == 0 && !empty($tokens)) {
  4658                     // we can dump current tokens into the string and throw them away afterwards
  4659                     $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
  4660                     $new_subpatterns = substr_count($new_entry, '(?:');
  4661                     if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + $new_subpatterns > GESHI_MAX_PCRE_SUBPATTERNS) {
  4662                         $regexp_list[++$list_key] = $new_entry;
  4663                         $num_subpatterns = $new_subpatterns;
  4664                     } else {
  4665                         if (!empty($regexp_list[$list_key])) {
  4666                             $new_entry = '|' . $new_entry;
  4667                         }
  4668                         $regexp_list[$list_key] .= $new_entry;
  4669                         $num_subpatterns += $new_subpatterns;
  4670                     }
  4671                     $tokens = array();
  4672                     $cur_len = 0;
  4673                 }
  4674                 // no further common denominator found
  4675                 $pointer[$entry] = array('' => true);
  4676                 array_splice($prev_keys, $level, count($prev_keys), $entry);
  4677 
  4678                 $cur_len += strlen($entry);
  4679                 break;
  4680             }
  4681             unset($list[$i]);
  4682         }
  4683         // make sure the last tokens get converted as well
  4684         $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
  4685         if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + substr_count($new_entry, '(?:') > GESHI_MAX_PCRE_SUBPATTERNS) {
  4686             if ( !empty($regexp_list[$list_key]) ) {
  4687               ++$list_key;
  4688             }
  4689             $regexp_list[$list_key] = $new_entry;
  4690         } else {
  4691             if (!empty($regexp_list[$list_key])) {
  4692                 $new_entry = '|' . $new_entry;
  4693             }
  4694             $regexp_list[$list_key] .= $new_entry;
  4695         }
  4696         return $regexp_list;
  4697     }
  4698     /**
  4699     * this function creates the appropriate regexp string of an token array
  4700     * you should not call this function directly, @see $this->optimize_regexp_list().
  4701     *
  4702     * @param &$tokens array of tokens
  4703     * @param $recursed bool to know wether we recursed or not
  4704     * @return string
  4705     * @author Milian Wolff <mail@milianw.de>
  4706     * @since 1.0.8
  4707     * @access private
  4708     */
  4709     function _optimize_regexp_list_tokens_to_string(&$tokens, $recursed = false) {
  4710         $list = '';
  4711         foreach ($tokens as $token => $sub_tokens) {
  4712             $list .= $token;
  4713             $close_entry = isset($sub_tokens['']);
  4714             unset($sub_tokens['']);
  4715             if (!empty($sub_tokens)) {
  4716                 $list .= '(?:' . $this->_optimize_regexp_list_tokens_to_string($sub_tokens, true) . ')';
  4717                 if ($close_entry) {
  4718                     // make sub_tokens optional
  4719                     $list .= '?';
  4720                 }
  4721             }
  4722             $list .= '|';
  4723         }
  4724         if (!$recursed) {
  4725             // do some optimizations
  4726             // common trailing strings
  4727             // BUGGY!
  4728             //$list = preg_replace_callback('#(?<=^|\:|\|)\w+?(\w+)(?:\|.+\1)+(?=\|)#', create_function(
  4729             //    '$matches', 'return "(?:" . preg_replace("#" . preg_quote($matches[1], "#") . "(?=\||$)#", "", $matches[0]) . ")" . $matches[1];'), $list);
  4730             // (?:p)? => p?
  4731             $list = preg_replace('#\(\?\:(.)\)\?#', '\1?', $list);
  4732             // (?:a|b|c|d|...)? => [abcd...]?
  4733             // TODO: a|bb|c => [ac]|bb
  4734             static $callback_2;
  4735             if (!isset($callback_2)) {
  4736                 $callback_2 = function( $matches ) {
  4737                     return "[" . str_replace("|", "", $matches[1]) . "]";
  4738                 };
  4739             }
  4740             $list = preg_replace_callback('#\(\?\:((?:.\|)+.)\)#', $callback_2, $list);
  4741         }
  4742         // return $list without trailing pipe
  4743         return substr($list, 0, -1);
  4744     }
  4745 } // End Class GeSHi
  4746 
  4747 
  4748 if (!function_exists('geshi_highlight')) {
  4749     /**
  4750      * Easy way to highlight stuff. Behaves just like highlight_string
  4751      *
  4752      * @param string The code to highlight
  4753      * @param string The language to highlight the code in
  4754      * @param string The path to the language files. You can leave this blank if you need
  4755      *               as from version 1.0.7 the path should be automatically detected
  4756      * @param boolean Whether to return the result or to echo
  4757      * @return string The code highlighted (if $return is true)
  4758      * @since 1.0.2
  4759      */
  4760     function geshi_highlight($string, $language, $path = null, $return = false) {
  4761         $geshi = new GeSHi($string, $language, $path);
  4762         $geshi->set_header_type(GESHI_HEADER_NONE);
  4763 
  4764         if ($return) {
  4765             return '<code>' . $geshi->parse_code() . '</code>';
  4766         }
  4767 
  4768         echo '<code>' . $geshi->parse_code() . '</code>';
  4769 
  4770         if ($geshi->error()) {
  4771             return false;
  4772         }
  4773         return true;
  4774     }
  4775 }
  4776 
  4777 ?>


Download geshi/geshi.php

History
Sun, 9 Dec 2018 23:32:58 +0100	Jan Dankert	Fix: Geshi PHP7-fähig
Thu, 20 Oct 2016 00:06:06 +0200	Jan Dankert	Aktuelle Geshi-Version installiert.
Fri, 23 Nov 2007 00:12:00 +0100	dankert	Farbige Darstellung von Code-Bl?cken mit Hilfe der Bibliothek GESHI.