00001 <?php
00002
00011 lt_include( PLOG_CLASS_PATH."class/data/stringutils.class.php" );
00012
00017 define( 'URLIZE_WORD_SEPARATOR_DEFAULT', '_' );
00018
00025 class TextFilter
00026 {
00027
00028 var $htmlAllowedTags;
00029
00033 function TextFilter()
00034 {
00035 lt_include( PLOG_CLASS_PATH."class/config/config.class.php" );
00036 $config =& Config::getConfig();
00037 $this->htmlAllowedTags = $config->getValue( "html_allowed_tags_in_comments" );
00038 }
00039
00048 function filterJavaScript( $text )
00049 {
00050
00051 $text = Textfilter::htmlDecode($text);
00052
00053
00054 $text = preg_replace('/<SCRIPT.*?<\/SCRIPT>/ims',"",$text);
00055
00057
00058
00059
00060
00061
00062
00063
00064 $text = preg_replace('/on(Load|Click|DblClick|DragStart|KeyDown|KeyPress|KeyUp|MouseDown|MouseMove|MouseOut|MouseOver|SelectStart|Blur|Focus|Scroll|Select|Unload|Change|Submit)\s*=\s*(\'|").*?\\2/smi',"",$text);
00065
00066 $text = preg_replace('/(\'|")Javascript:.*?\\1/smi','',$text);
00067
00068 return $text;
00069 }
00070
00080 function filterHTML( $string )
00081 {
00082 $tmp = strip_tags( $string, $this->htmlAllowedTags );
00083
00084 $filteredString = $this->filterJavaScript( $tmp );
00085
00086 return $filteredString;
00087 }
00088
00097 function filterAllHTML( $string )
00098 {
00099 $tmp = strip_tags( $string );
00100
00101 $filteredString = Textfilter::filterJavaScript( $tmp );
00102
00103 return( trim($filteredString));
00104 }
00105
00112 function filterHTMLEntities( $string )
00113 {
00114 return htmlentities( $string );
00115 }
00116
00122 function filterXMLEntities( $string )
00123 {
00124 return $this->filterHTMLEntities($string);
00125 }
00126
00134 function filterCharacters( $string, $characters = Array())
00135 {
00136 foreach( $characters as $char ) {
00137 $string = str_replace( $char, "", $string );
00138 }
00139
00140 return $string;
00141 }
00142
00148 function texturize($text)
00149 {
00150 $textarr = preg_split("/(<.*>)/U", $text, -1, PREG_SPLIT_DELIM_CAPTURE);
00151 $stop = count($textarr); $next = true;
00152 for ($i = 0; $i < $stop; $i++) {
00153 $curl = $textarr[$i];
00154 if (!strstr($_SERVER['HTTP_USER_AGENT'], 'Gecko')) {
00155 $curl = str_replace('<q>', '“', $curl);
00156 $curl = str_replace('</q>', '”', $curl);
00157 }
00158 if ('<' != $curl{0} && $next) {
00159 $curl = str_replace('---', '—', $curl);
00160 $curl = str_replace('--', '–', $curl);
00161 $curl = str_replace("...", '…', $curl);
00162 $curl = str_replace('``', '“', $curl);
00163
00164 $curl = preg_replace("/'s/", "’s", $curl);
00165 $curl = preg_replace("/'(\d\d(?:’|')?s)/", "’$1", $curl);
00166 $curl = preg_replace('/(\s|\A|")\'/', '$1‘', $curl);
00167 $curl = preg_replace("/(\d+)\"/", "$1″", $curl);
00168 $curl = preg_replace("/(\d+)'/", "$1′", $curl);
00169 $curl = preg_replace("/(\S)'([^'\s])/", "$1’$2", $curl);
00170 $curl = preg_replace('/"([\s.]|\Z)/', '”$1', $curl);
00171 $curl = preg_replace('/(\s|\A)"/', '$1“', $curl);
00172 $curl = preg_replace("/'([\s.]|\Z)/", '’$1', $curl);
00173 $curl = preg_replace("/\(tm\)/i", '™', $curl);
00174 $curl = preg_replace("/\(c\)/i", '©', $curl);
00175 $curl = preg_replace("/\(r\)/i", '®', $curl);
00176
00177 $curl = str_replace("''", '”', $curl);
00178 $curl = preg_replace('/&([^#])(?![a-z]{2,8};)/', '&$1', $curl);
00179
00180 $curl = preg_replace('/(d+)x(\d+)/', "$1×$2", $curl);
00181 } elseif (strstr($curl, '<code') || strstr($curl, '<pre') || strstr($curl, '<kbd' || strstr($curl, '<style') || strstr($curl, '<script'))) {
00182
00183 $next = false;
00184 } else {
00185 $next = true;
00186 }
00187 $output .= $curl;
00188 }
00189
00190 return $output;
00191 }
00192
00202 function autoP($pee, $br=1)
00203 {
00204 $pee = preg_replace("/(\r\n|\n|\r)/", "\n", $pee);
00205 $pee = preg_replace("/\n\n+/", "\n\n", $pee);
00206 $pee = preg_replace('/\n?(.+?)(\n\n|\z)/s', "<p>$1</p>\n", $pee);
00207 $pee = preg_replace('/<p>(<(?:table|[ou]l|pre|select|form|blockquote)>)/', "$1", $pee);
00208 $pee = preg_replace('!(</?(?:table|[ou]l|pre|select|form|blockquote)>)</p>!', "$1", $pee);
00209 if ($br) $pee = preg_replace('|(?<!</p>)\s*\n|', "<br />\n", $pee);
00210 $pee = preg_replace('!(</(?:table|[ou]l|pre|select|form|blockquote)>)<br />!', "$1", $pee);
00211 $pee = str_replace('<p><p>', '<p>', $pee);
00212 $pee = str_replace('</p></p>', '</p>', $pee);
00213
00214 return $pee;
00215 }
00216
00229 function htmlDecode( $htmlString, $quote_style = ENT_QUOTES )
00230 {
00231
00232 $htmlString = preg_replace('~&#x([0-9a-f]+);~ei', 'chr(hexdec("\\1"))', $htmlString);
00233 $htmlString = preg_replace('~&#([0-9]+);~e', 'chr(\\1)', $htmlString);
00234
00235 $trans_table = get_html_translation_table( HTML_ENTITIES, $quote_style );
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248 lt_include( PLOG_CLASS_PATH . "class/config/configfilestorage.class.php" );
00249 $config = new ConfigFileStorage();
00250 if( $config->getValue( 'db_character_set' ) == 'utf8' ) {
00251
00252 foreach ( $trans_table as $key => $value ){
00253 $new_trans_table[$value] = utf8_encode( $key );
00254 }
00255 } else {
00256
00257 $new_trans_table = array_flip($trans_table);
00258 }
00259 return strtr( $htmlString, $new_trans_table );
00260 }
00261
00274 function normalizeText( $text )
00275 {
00276 lt_include( PLOG_CLASS_PATH."class/dao/article.class.php" );
00277
00278 $result = TextFilter::filterAllHtml( $text );
00279
00280 $result = str_replace( POST_EXTENDED_TEXT_MODIFIER, "", $result );
00281
00282 $result = TextFilter::htmlDecode( $result );
00283
00284 $result = ereg_replace( "/[^A-Za-z0-9_]/", " ", $result );
00285
00286 $result = preg_replace( "/ +/", " ", $result );
00287
00288 return $result;
00289 }
00290
00309 function balanceTags($text, $is_comment = 0)
00310 {
00311 $tagstack = array(); $stacksize = 0; $tagqueue = ''; $newtext = '';
00312
00313 # WP bug fix for comments - in case you REALLY meant to type '< !--'
00314 $text = str_replace('< !--', '< !--', $text);
00315 # WP bug fix for LOVE <3 (and other situations with '<' before a number)
00316 $text = preg_replace('#<([0-9]{1})#', '<$1', $text);
00317
00318 while (preg_match("/<(\/?\w*)\s*([^>]*)>/",$text,$regex)) {
00319 $newtext .= $tagqueue;
00320
00321 $i = strpos($text,$regex[0]);
00322 $l = strlen($regex[0]);
00323
00324
00325 $tagqueue = '';
00326
00327 if ($regex[1][0] == "/") {
00328 $tag = strtolower(substr($regex[1],1));
00329
00330 if($stacksize <= 0) {
00331 $tag = '';
00332
00333 }
00334
00335 else if ($tagstack[$stacksize - 1] == $tag) {
00336 $tag = '</' . $tag . '>';
00337
00338 array_pop ($tagstack);
00339 $stacksize--;
00340 } else {
00341 for ($j=$stacksize-1;$j>=0;$j--) {
00342 if ($tagstack[$j] == $tag) {
00343
00344 for ($k=$stacksize-1;$k>=$j;$k--){
00345 $tagqueue .= '</' . array_pop ($tagstack) . '>';
00346 $stacksize--;
00347 }
00348 break;
00349 }
00350 }
00351 $tag = '';
00352 }
00353 } else {
00354 $tag = strtolower($regex[1]);
00355
00356
00357
00358
00359 if((substr($regex[2],-1) == '/') || ($tag == '')) {
00360 }
00361
00362 elseif ($tag == 'br' || $tag == 'img' || $tag == 'hr' || $tag == 'input') {
00363 $regex[2] .= '/';
00364 } else {
00365
00366 if (($stacksize > 0) && ($tag != 'div') && ($tagstack[$stacksize - 1] == $tag)) {
00367 $tagqueue = '</' . array_pop ($tagstack) . '>';
00368 $stacksize--;
00369 }
00370 $stacksize = array_push ($tagstack, $tag);
00371 }
00372
00373
00374 $attributes = $regex[2];
00375 if($attributes) {
00376 $attributes = ' '.$attributes;
00377 }
00378 $tag = '<'.$tag.$attributes.'>';
00379
00380 if ($tagqueue) {
00381 $tagqueue .= $tag;
00382 $tag = '';
00383 }
00384 }
00385 $newtext .= substr($text,0,$i) . $tag;
00386 $text = substr($text,$i+$l);
00387 }
00388
00389
00390 $newtext .= $tagqueue;
00391
00392
00393 $newtext .= $text;
00394
00395
00396 while($x = array_pop($tagstack)) {
00397 $newtext .= '</' . $x . '>';
00398 }
00399
00400
00401 $newtext = str_replace("< !--","<!--",$newtext);
00402 $newtext = str_replace("< !--","< !--",$newtext);
00403
00404 return $newtext;
00405 }
00406
00418 function urlize( $string, $domainize = false )
00419 {
00420 lt_include( PLOG_CLASS_PATH."class/config/config.class.php" );
00421 $config =& Config::getConfig();
00422 $separator = $config->getValue( "urlize_word_separator", URLIZE_WORD_SEPARATOR_DEFAULT );
00423
00424
00425 $string = preg_replace( "/ +/", " ", strtolower($string) );
00426
00427
00428 $string = str_replace(array(';','/','?',':','@','&','=','+','$',','),
00429 $separator, $string);
00430
00431
00432 $search = array(' ', 'ä', 'ö', 'ü','é','è','à','ç', 'à', 'è', 'ì',
00433 'ò', 'ù', 'á', 'é', 'í', 'ó', 'ú', 'ë', 'ï' );
00434 $replace = array( $separator, 'a','o','u','e','e','a','c', 'a', 'e', 'i',
00435 'o', 'u', 'a', 'e', 'i', 'o', 'u', 'e', 'i' );
00436 if($domainize){
00437
00438
00439
00440 $search[] = '-';
00441 $search[] = '_';
00442 $replace[] = $separator;
00443 $replace[] = $separator;
00444 }
00445 $string = str_replace($search, $replace, $string);
00446
00447
00448
00449
00450 $good_characters = "a-z0-9.\\".$separator;
00451 if(!$domainize){
00452 $good_characters .= "_\\-";
00453 }
00454 $string = preg_replace( '/[^'.$good_characters.']/', '', $string );
00455
00456
00457 $string = preg_replace("/[".$separator."]+/", $separator, $string);
00458
00459 $string = trim($string, $separator);
00460 if($domainize){
00461
00462 $string = trim($string, ".");
00463 }
00464
00465 return $string;
00466 }
00467
00483 function domainize( $string )
00484 {
00485 return Textfilter::urlize($string, true);
00486 }
00487
00497 function xhtmlize( $string )
00498 {
00499
00500 lt_include( PLOG_CLASS_PATH."class/config/config.class.php" );
00501 $config =& Config::getConfig();
00502 if( $config->getValue( "xhtml_converter_enabled" )) {
00503 lt_include( PLOG_CLASS_PATH."class/data/kses.class.php" );
00504 $kses = new kses( true, $config->getValue( "xhtml_converter_aggresive_mode_enabled"));
00505 $result = $kses->Parse( $string );
00506
00507
00508
00509 }
00510 else
00511 $result = $string;
00512
00513 return $result;
00514 }
00515
00516
00525 function slugify( $string ){
00526 lt_include( PLOG_CLASS_PATH."class/config/config.class.php" );
00527 lt_include( PLOG_CLASS_PATH.'class/net/linkparser.class.php' );
00528
00529 $config =& Config::getConfig();
00530 $separator = $config->getValue( "urlize_word_separator", URLIZE_WORD_SEPARATOR_DEFAULT );
00531
00532 $lp = new LinkParser("");
00533 $regexp = $lp->getValidTag("{postname}");
00534 $start_bracket = strpos($regexp, "[");
00535 $end_bracket = strrpos($regexp, "]");
00536 $validChars = false;
00537 if($start_bracket !== false && $end_bracket !== false){
00538 $validChars = substr($regexp, $start_bracket+1,
00539 $end_bracket-$start_bracket-1);
00540 }
00541
00542
00543 if($validChars === false){
00544 $validChars = "_0-9a-zA-Z.-";
00545 }
00546
00547 $string = preg_replace("/[^".$validChars."]/", $separator, strip_tags(Textfilter::htmlDecode($string)));
00548
00549 $string = preg_replace("/[".$separator."]+/", $separator, $string);
00550
00551 $string = trim($string, $separator);
00552
00553 $string = strtolower( $string );
00554
00555 return $string;
00556 }
00557
00558
00559 function recursiveStripSlashes($obj){
00560 foreach($obj as $key => $value){
00561 if(is_array($value)){
00562 $obj[$key] = Textfilter::recursiveStripSlashes($value);
00563 }
00564 else{
00565 $obj[$key] = stripslashes($value);
00566 }
00567 }
00568 return $obj;
00569 }
00570
00571
00575 function checkboxToBoolean( $value )
00576 {
00577 if( $value == "1" || $value == "on" )
00578 return true;
00579 else
00580 return false;
00581 }
00582 }
00583 ?>