commit 56259b72331a2cad3225e8e5112b3ec35597d2c1
parent 318caa95b022980e781404468359340746bc321e
Author: dankert <devnull@localhost>
Date: Tue, 28 Dec 2010 01:35:02 +0100
Fix: Kyrillische UTF-8-Zeichen (2-byte) HTML-kodieren
Diffstat:
1 file changed, 74 insertions(+), 20 deletions(-)
diff --git a/functions/language.inc.php b/functions/language.inc.php
@@ -90,31 +90,85 @@ function langHtml( $key,$vars = array() ) {
function encodeHtml($text)
{
$charset = lang('CHARSET');
- if ( in_array( $charset, array('ISO-8859-1','ISO-8859-15','UTF-8')) )
- {
- return htmlentities($text,ENT_QUOTES,$charset );
- }
- else
+
+ switch( $charset )
{
- return $text;
- // TODO: Was tun mit ISO-8859-5 (kyryllisch)?
-
- $t2 = '';
-
- for ( $i=0;$i<strlen($text);$i++)
- {
- $o = ord($text[$i]);
- if ( $o <= 127 )
- $t2 .= $text[$i];
- else
- $t2 .= '&#'.$o.';';
- }
-
- return $t2;
+ case 'ISO-8859-1':
+ case 'ISO-8859-15':
+ return htmlentities($text,ENT_QUOTES,$charset );
+ case 'UTF-8':
+ return translateutf8tohtml($text);
+ default:
+ return $text;
}
}
+
+
+// Source: http://de.php.net/manual/de/function.htmlentities.php#96648
+// Thx to silverbeat!
+// When using UTF-8 as a charset, htmlentities will only convert 1-byte and 2-byte characters.
+// Use this function if you also want to convert 3-byte and 4-byte characters:
+// converts a UTF8-string into HTML entities
+ function translateutf8tohtml($txt) {
+ //$txt = html_entity_decode($txt);
+ $txt2 = '';
+ for ($i=0;$i<strlen($txt);$i++) {
+ $o = ord($txt{$i});
+ if ($o<128) {
+ // 0..127: raw
+ $txt2 .= $txt{$i};
+ } else {
+ $o1 = 0;
+ $o2 = 0;
+ $o3 = 0;
+ if ($i<strlen($txt)-1) $o1 = ord($txt{$i+1});
+ if ($i<strlen($txt)-2) $o2 = ord($txt{$i+2});
+ if ($i<strlen($txt)-3) $o3 = ord($txt{$i+3});
+ $hexval = 0;
+ if ($o>=0xc0 && $o<0xc2) {
+ // INVALID --- should never occur: 2-byte UTF-8 although value < 128
+ $hexval = $o1;
+ $i++;
+ } elseif ($o>=0xc2 && $o<0xe0 && $o1>=0x80) {
+ // 194..223: 2-byte UTF-8
+ $hexval |= ($o & 0x1f) << 6; // 1. byte: five bits of 1. char
+ $hexval |= ($o1 & 0x3f); // 2. byte: six bits of 2. char
+ $i++;
+ } elseif ($o>=0xe0 && $o<0xf0 && $o1>=0x80 && $o2>=0x80) {
+ // 224..239: 3-byte UTF-8
+ $hexval |= ($o & 0x0f) << 12; // 1. byte: four bits of 1. char
+ $hexval |= ($o1 & 0x3f) << 6; // 2.+3. byte: six bits of 2.+3. char
+ $hexval |= ($o2 & 0x3f);
+ $i += 2;
+ } elseif ($o>=0xf0 && $o<0xf4 && $o1>=0x80) {
+ // 240..244: 4-byte UTF-8
+ $hexval |= ($o & 0x07) << 18; // 1. byte: three bits of 1. char
+ $hexval |= ($o1 & 0x3f) << 12; // 2.-4. byte: six bits of 2.-4. char
+ $hexval |= ($o2 & 0x3f) << 6;
+ $hexval |= ($o3 & 0x3f);
+ $i += 3;
+ } else {
+ // don't know ... just encode
+ $hexval = $o;
+ }
+ $hexstring = dechex($hexval);
+ if (strlen($hexstring)%2) $hexstring = '0' . $hexstring;
+ $txt2 .= '&#x' . $hexstring . ';';
+ }
+ }
+ $result = $txt2;
+ //echo "'$txt wird zu:".htmlentities($result)."'";
+ return $result;
+ }
+
+
+
+
+
+
+
/**
* Diese Funktion prueft, ob ein Sprachelement vorhanden ist
*