openrat-cms

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README

Text.class.php (13030B)


      1 <?php
      2 // OpenRat Content Management System
      3 // Copyright (C) 2002 Jan Dankert, jandankert@jandankert.de
      4 //
      5 // This program is free software; you can redistribute it and/or
      6 // modify it under the terms of the GNU General Public License
      7 // as published by the Free Software Foundation; either version 2
      8 // of the License, or (at your option) any later version.
      9 //
     10 // This program is distributed in the hope that it will be useful,
     11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
     12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13 // GNU General Public License for more details.
     14 //
     15 // You should have received a copy of the GNU General Public License
     16 // along with this program; if not, write to the Free Software
     17 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
     18 
     19 
     20 namespace util;
     21 
     22 use cms\base\Configuration;
     23 
     24 /**
     25  * Nuetzliche Funktionen fuer das Bearbeiten von Texten/Zeichenketten
     26  * @author $Author$
     27  * @version $Revision$
     28  * @package openrat.services
     29  */
     30 class Text
     31 {
     32     const ARROW_RIGHT = "\xE2\x86\x92";
     33 	const FILE_SEP = " \xE2\x86\x92 ";
     34 
     35 
     36 	/**
     37 	 *
     38 	 * @param unknown $key
     39 	 * @param unknown $text
     40 	 * @return string|unknown
     41 	 */
     42 	public static function accessKey($key, $text)
     43 	{
     44 		$pos = strpos(strtolower($text), strtolower($key));
     45 
     46 		if ($pos !== false)
     47 			return substr($text, 0, max($pos, 0)) . '<span class="accesskey">' . substr($text, $pos, 1) . '</span>' . substr($text, $pos + 1);
     48 		else
     49 			return $text;
     50 	}
     51 
     52 
     53 	/**
     54 	 * Einen Text auf eine bestimmte Laenge begrenzen.
     55 	 *
     56 	 * Ist der Text zu lang, so wird abgeschnitten und
     57 	 * eine Zeichenkette angehaengt.
     58 	 *
     59 	 * @param String Text, der zu begrenzen ist
     60 	 * @param Integer maximale Laenge des Textes (optional)
     61 	 * @param Text, der an gekuerzten Text angehangen wird (optional)
     62 	 */
     63 	public static function maxLength($text, $laenge = 20, $append = '...', $where = STR_PAD_RIGHT)
     64 	{
     65 		if (strlen($text) > $laenge) {
     66 			if ($where == STR_PAD_RIGHT)
     67 				$text = substr($text, 0, $laenge) . $append;
     68 			elseif ($where == STR_PAD_BOTH)
     69 				$text = substr($text, 0, $laenge / 2) . $append . substr($text, strlen($text) - ($laenge / 2));
     70 		}
     71 
     72 		return $text;
     73 	}
     74 
     75 
     76 	/**
     77 	 * Umwandeln von BB-Code in Wiki-Textauszeichnungen
     78 	 *
     79 	 * @param text zu bearbeitender Text
     80 	 *
     81 	 * @return String Ausgabe
     82 	 */
     83 	public static function bbCode2Wiki($inhalt)
     84 	{
     85 		$inhalt = preg_replace('/\[b\]([^\[]*)\[\/b\]/i', '*\\1*', $inhalt);
     86 		$inhalt = preg_replace('/\[i\]([^\[]*)\[\/i\]/i', '_\\1_', $inhalt);
     87 		$inhalt = preg_replace('/\[code\]([^\[]*)\[\/code\]/i', '=\\1=', $inhalt);
     88 
     89 		$inhalt = preg_replace('/\[url\]([^\[]*)\[\/url\]/i', '"\\1"->"\\1"', $inhalt);
     90 		$inhalt = preg_replace('/\[url=([^\[]*)\]([^\[]*)\[\/url\]/i', '"\\2"->"\\1"', $inhalt);
     91 
     92 		return $inhalt;
     93 	}
     94 
     95 
     96 	/**
     97 	 * Umwandeln von einfachen HTML-Befehlen in Wiki-Textauszeichnungen
     98 	 *
     99 	 * @param text zu bearbeitender Text
    100 	 *
    101 	 * @return String Ausgabe
    102 	 */
    103 	public static function Html2Wiki($inhalt)
    104 	{
    105 		$inhalt = preg_replace('/<b(.*)>(.*)<\/b>/i', '*\\2*', $inhalt);
    106 		$inhalt = preg_replace('/<i(.*)>(.*)<\/i>/i', '_\\2_', $inhalt);
    107 		$inhalt = preg_replace('/<a(.*)href="(.*)">(.*)<\/a>/i', '"\\3"->"\\2"', $inhalt);
    108 
    109 		return $inhalt;
    110 	}
    111 
    112 
    113 	/**
    114 	 * HTML-Entitaeten fuer HTML-Tags verwenden
    115 	 *
    116 	 * @param String Text, in dem HTML-Tags umgewandelt werden sollen
    117 	 * @return String Ausgabe
    118 	 */
    119 	public static function encodeHtml($inhalt)
    120 	{
    121 		//$inhalt = str_replace('&','&amp;',$inhalt);
    122 		$inhalt = str_replace('"', '&quot;', $inhalt);
    123 		$inhalt = str_replace('<', '&lt;', $inhalt);
    124 		$inhalt = str_replace('>', '&gt;', $inhalt);
    125 
    126 		return $inhalt;
    127 	}
    128 
    129 
    130 	/**
    131 	 * Ersetzt Sonderzeichen durch HTML-�quivalente.<br>
    132 	 * Z.B. Ersetzt "(c)" durch "&copy;".
    133 	 */
    134 	public static function replaceHtmlChars($text)
    135 	{
    136 		$htmlConfig = Configuration::subset(['editor','html']);
    137 
    138 		foreach ( $htmlConfig->get('replace',[]) as $repl) {
    139 			list($ersetze, $mit) = explode(':', $repl . ':');
    140 			$text = str_replace($ersetze, $mit, $text);
    141 		}
    142 
    143 		return $text;
    144 	}
    145 
    146 
    147 	/**
    148 	 * HTML-Entitaeten fuer HTML-Tags verwenden
    149 	 *
    150 	 * @param String Text, in dem HTML-Tags umgewandelt werden sollen
    151 	 * @return String Ausgabe
    152 	 */
    153 	public static function encodeHtmlSpecialChars($inhalt)
    154 	{
    155 		return Text::replaceHtmlChars($inhalt);
    156 	}
    157 
    158 
    159 	const DIFF_NEW     = 'new';
    160 	const DIFF_OLD     = 'old';
    161 	const DIFF_EQUAL   = 'equal';
    162 	const DIFF_CHANGED = 'notequal';
    163 	const DIFF_EMPTY   = 'empty';
    164 
    165 	/**
    166 	 * Vergleicht 2 Text-Arrays und ermittelt eine Darstellung der Unterschiede
    167 	 * @param $from_text array text lines
    168 	 * @param $to_text   array text lines
    169 	 * @return array[] an array containing 2 arrays with the same length
    170 	 */
    171 	public static function diff($from_text, $to_text)
    172 	{
    173 		/**
    174 		 * Creating a diff entry
    175 		 * @param $text
    176 		 * @param $line
    177 		 * @param $type
    178 		 * @return array
    179 		 */
    180 		$createEntry = function($text, $line, $type) {
    181 			return [
    182 				'text' => $text,
    183 				'line' => $line,
    184 				'type' => $type,
    185 			];
    186 		};
    187 		$emptyEntry = $createEntry(null,null,self::DIFF_EMPTY);
    188 
    189 		// Zaehler pro Textarray
    190 		$pos_from = -1;
    191 		$pos_to   = -1;
    192 
    193 		// Ergebnis-Arrays
    194 		$from_out = [];
    195 		$to_out   = [];
    196 
    197 		while (true) {
    198 			$pos_from++;
    199 			$pos_to++;
    200 
    201 			if (!isset($from_text[$pos_from]) &&
    202 				!isset($to_text  [$pos_to])) {
    203 				// Text in ist 'neu' und 'alt' zuende. Ende der Schleife.
    204 				break;
    205 			} elseif
    206 			(isset($from_text[$pos_from]) &&
    207 				!isset($to_text  [$pos_to])) {
    208 				// Text in 'neu' ist zuende, die Restzeilen von 'alt' werden ausgegeben
    209 				while (isset($from_text[$pos_from])) {
    210 					$from_out[] = $createEntry( $from_text[$pos_from],$pos_from + 1, self::DIFF_OLD);
    211 					$to_out  [] = $emptyEntry;
    212 					$pos_from++;
    213 				}
    214 				break;
    215 			} elseif
    216 			(!isset($from_text[$pos_from]) &&
    217 				isset($to_text  [$pos_to])) {
    218 				// Umgekehrter Fall: Text in 'alt' ist zuende, Restzeilen aus 'neu' werden ausgegeben
    219 				while (isset($to_text[$pos_to])) {
    220 					$from_out[] = $emptyEntry;
    221 					$to_out  [] = $createEntry($to_text[$pos_to], $pos_to + 1, self::DIFF_NEW);
    222 					$pos_to++;
    223 				}
    224 				break;
    225 			} elseif( rtrim($from_text[$pos_from]) != rtrim($to_text[$pos_to]) ) {
    226 				// Zeilen sind vorhanden, aber ungleich
    227 				// Wir suchen jetzt die naechsten beiden Zeilen, die gleich sind.
    228 				$max_entf = min(count($from_text) - $pos_from - 1, count($to_text) - $pos_to - 1);
    229 
    230 				for ($a = 0; $a <= $max_entf; $a++) {
    231 					for ($b = 0; $b <= $max_entf; $b++) {
    232 						if (trim($from_text[$pos_from + $b]) != '' &&
    233 							$from_text[$pos_from + $b] == $to_text[$pos_to + $a]) {
    234 							$pos_gef_from = $pos_from + $b;
    235 							$pos_gef_to = $pos_to + $a;
    236 							break;
    237 						}
    238 
    239 						if (trim($from_text[$pos_from + $a]) != '' &&
    240 							$from_text[$pos_from + $a] == $to_text[$pos_to + $b]) {
    241 							$pos_gef_from = $pos_from + $a;
    242 							$pos_gef_to = $pos_to + $b;
    243 							break;
    244 						}
    245 					}
    246 
    247 					if ($b <= $max_entf) {
    248 						break;
    249 					}
    250 				}
    251 
    252 				if ($a <= $max_entf) {
    253 					// Gleiche Zeile gefunden
    254 
    255 					if ($pos_gef_from - $pos_from == 0)
    256 						$type = self::DIFF_NEW;
    257 					elseif
    258 					($pos_gef_to - $pos_to == 0)
    259 						$type = self::DIFF_OLD;
    260 					else
    261 						$type = self::DIFF_CHANGED;
    262 
    263 					while ($pos_gef_from - $pos_from > 0 &&
    264 						$pos_gef_to - $pos_to > 0) {
    265 						$from_out[] = $createEntry($from_text[$pos_from], $pos_from + 1, $type);
    266 						$to_out  [] = $createEntry($to_text  [$pos_to  ], $pos_to + 1, $type);
    267 
    268 						$pos_from++;
    269 						$pos_to++;
    270 					}
    271 
    272 					while ($pos_gef_from - $pos_from > 0) {
    273 						$from_out[] = $createEntry($from_text[$pos_from], $pos_from + 1, $type);
    274 						$to_out  [] = $emptyEntry;
    275 						$pos_from++;
    276 					}
    277 
    278 					while ($pos_gef_to - $pos_to > 0) {
    279 						$from_out[] = $emptyEntry;
    280 						$to_out  [] = $createEntry($to_text  [$pos_to], $pos_to + 1, $type);
    281 						$pos_to++;
    282 					}
    283 					$pos_from--;
    284 					$pos_to--;
    285 				} else {
    286 					// Keine gleichen Zeilen gefunden
    287 
    288 					while (true) {
    289 						if (!isset($from_text[$pos_from]) &&
    290 							!isset($to_text  [$pos_to  ])) {
    291 							break;
    292 						} elseif
    293 						(isset($from_text[$pos_from]) &&
    294 							!isset($to_text  [$pos_to])) {
    295 							$from_out[] = array($from_text[$pos_from], $pos_from + 1, self::DIFF_CHANGED);
    296 							$to_out  [] = $emptyEntry;
    297 						} elseif
    298 						(!isset($from_text[$pos_from]) &&
    299 							isset($to_text  [$pos_to])) {
    300 							$from_out[] = $emptyEntry;
    301 							$to_out  [] = $createEntry($to_text  [$pos_to]  , $pos_to   + 1, self::DIFF_CHANGED);
    302 						} else {
    303 							$from_out[] = $createEntry($from_text[$pos_from], $pos_from + 1, self::DIFF_CHANGED);
    304 							$to_out  [] = $createEntry($to_text  [$pos_to]  , $pos_to   + 1, self::DIFF_CHANGED);
    305 						}
    306 						$pos_from++;
    307 						$pos_to++;
    308 					}
    309 				}
    310 			} else {
    311 				// Zeilen sind gleich
    312 				$from_out[] = $createEntry($from_text[$pos_from], $pos_from + 1, self::DIFF_EQUAL);
    313 				$to_out  [] = $createEntry($to_text  [$pos_to]  , $pos_to   + 1, self::DIFF_EQUAL);
    314 			}
    315 		}
    316 
    317 		return ( [$from_out, $to_out] );
    318 	}
    319 
    320 
    321 	/**
    322 	 * Saeubert eine Zeichenkette.
    323 	 *
    324 	 *  Es werden ungueltige Zeichen aus einer Zeichenkette entfernt. Es wird mit einer Whitelist
    325 	 *  gearbeitet, d.h. die erlaubten Zeichen werden angegeben.
    326 	 *
    327 	 * @param $eingabe Die Eingabe-Zeichenkette, aus der ungueltige Zeichen entfernt werden sollen.
    328 	 * @param $erlaubt Die erlaubten Zeichen (eine "White-List")
    329 	 * @return String die aufgeräumte Zeichenkette
    330 	 */
    331 	public static function clean($eingabe, $erlaubt)
    332 	{
    333 		$first = strtr($eingabe, $erlaubt, str_repeat("\x01", strlen($erlaubt)));
    334 		$second = strtr($eingabe, $first, str_repeat("\x00", strlen($first)));
    335 		return str_replace("\x00", '', $second);
    336 	}
    337 
    338 
    339 	/**
    340 	 * Searches for Object-Ids in a text.
    341 	 * Searches in the provided text for URLs with "__OID__nnn__", where nnn is an object id.
    342 	 * @param $text
    343 	 * @return array
    344 	 */
    345 	public static function parseOID($text)
    346 	{
    347 		$oids = array();
    348 		$treffer = array();
    349 
    350 		// This are all chars which are used in our URLs.
    351 		// Sure, there are more, but not used by this system.
    352 		$urlChars = '[A-Za-z0-9_.:,\/=+&?-]';
    353 
    354 		preg_match_all('/(' . $urlChars . '*)__OID__([0-9]+)__(' . $urlChars . '*)/', $text, $treffer, PREG_SET_ORDER);
    355 
    356 		foreach ($treffer as $t) {
    357 
    358 			$id = $t[2];
    359 			$match = $t[0];
    360 
    361 			if (!isset($oids[$id]))
    362 				$oids[$id] = array();
    363 
    364 			$oids[$id][] = $match;
    365 		}
    366 
    367 		return $oids;
    368 	}
    369 
    370 
    371 
    372 
    373 	// Source: http://de.php.net/manual/de/function.htmlentities.php#96648
    374 	// Thx to silverbeat!
    375 	// When using UTF-8 as a charset, htmlentities will only convert 1-byte and 2-byte characters.
    376 	// Use this function if you also want to convert 3-byte and 4-byte characters:
    377 	// converts a UTF8-string into HTML entities
    378 	public static function translateutf8tohtml($txt)
    379 	{
    380 		//$txt = html_entity_decode($txt);
    381 		$txt2 = '';
    382 		for ($i = 0; $i < strlen($txt); $i++) {
    383 			$o = ord($txt[$i]);
    384 			if ($o < 128) {
    385 				// 0..127: raw
    386 				$txt2 .= $txt[$i];
    387 			} else {
    388 				$o1 = 0;
    389 				$o2 = 0;
    390 				$o3 = 0;
    391 				if ($i < strlen($txt) - 1) $o1 = ord($txt[$i + 1]);
    392 				if ($i < strlen($txt) - 2) $o2 = ord($txt[$i + 2]);
    393 				if ($i < strlen($txt) - 3) $o3 = ord($txt[$i + 3]);
    394 				$hexval = 0;
    395 				if ($o >= 0xc0 && $o < 0xc2) {
    396 					// INVALID --- should never occur: 2-byte UTF-8 although value < 128
    397 					$hexval = $o1;
    398 					$i++;
    399 				} elseif ($o >= 0xc2 && $o < 0xe0 && $o1 >= 0x80) {
    400 					// 194..223: 2-byte UTF-8
    401 					$hexval |= ($o & 0x1f) << 6;   // 1. byte: five bits of 1. char
    402 					$hexval |= ($o1 & 0x3f);   // 2. byte: six bits of 2. char
    403 					$i++;
    404 				} elseif ($o >= 0xe0 && $o < 0xf0 && $o1 >= 0x80 && $o2 >= 0x80) {
    405 					// 224..239: 3-byte UTF-8
    406 					$hexval |= ($o & 0x0f) << 12;  // 1. byte: four bits of 1. char
    407 					$hexval |= ($o1 & 0x3f) << 6;  // 2.+3. byte: six bits of 2.+3. char
    408 					$hexval |= ($o2 & 0x3f);
    409 					$i += 2;
    410 				} elseif ($o >= 0xf0 && $o < 0xf4 && $o1 >= 0x80) {
    411 					// 240..244: 4-byte UTF-8
    412 					$hexval |= ($o & 0x07) << 18; // 1. byte: three bits of 1. char
    413 					$hexval |= ($o1 & 0x3f) << 12; // 2.-4. byte: six bits of 2.-4. char
    414 					$hexval |= ($o2 & 0x3f) << 6;
    415 					$hexval |= ($o3 & 0x3f);
    416 					$i += 3;
    417 				} else {
    418 					// don't know ... just encode
    419 					$hexval = $o;
    420 				}
    421 				$hexstring = dechex($hexval);
    422 				if (strlen($hexstring) % 2) $hexstring = '0' . $hexstring;
    423 				$txt2 .= '&#x' . $hexstring . ';';
    424 			}
    425 		}
    426 		$result = $txt2;
    427 
    428 		return $result;
    429 	}
    430 
    431 
    432 	public static function hexDump( $data, $newline="\n")
    433 	{
    434 		$width =  16; # number of bytes per line
    435 		$pad   = '.'; # padding for non-visible characters
    436 
    437 		$from   = '';
    438 		$to     = '';
    439 		$output = '';
    440 
    441 		for ($i=0; $i<=0xFF; $i++)
    442 		{
    443 			$from .= chr($i);
    444 			$to   .= ($i >= 0x20 && $i <= 0x7E) ? chr($i) : $pad;
    445 		}
    446 
    447 		$hex   = str_split(bin2hex($data), $width*2);
    448 		$chars = str_split(strtr($data, $from, $to), $width);
    449 
    450 		foreach ($hex as $i=>$line)
    451 			$output .=
    452 				implode('  ',array_pad(str_split($chars[$i]),16,' ')     ) . '   ['.str_pad($chars[$i],16).']' . $newline .
    453 				implode(' ' ,array_pad(str_split($line ,2),16,'  ') ) . $newline;
    454 		return $output;
    455 	}
    456 
    457 
    458 
    459 }
    460