| [ PHPXref.com ] | [ Generated: Sun Jul 20 16:35:25 2008 ] | [ bBlog 0.7.6 ] |
| [ Index ] [ Variables ] [ Functions ] [ Classes ] [ Constants ] [ Statistics ] | ||
[Summary view] [Print] [Text view]
1 <?php 2 3 # kses 0.2.1 - HTML/XHTML filter that only allows some elements and attributes 4 # Copyright (C) 2002, 2003 Ulf Harnhammar 5 # 6 # This program is free software and open source software; you can redistribute 7 # it and/or modify it under the terms of the GNU General Public License as 8 # published by the Free Software Foundation; either version 2 of the License, 9 # or (at your option) any later version. 10 # 11 # This program is distributed in the hope that it will be useful, but WITHOUT 12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 # more details. 15 # 16 # You should have received a copy of the GNU General Public License along 17 # with this program; if not, write to the Free Software Foundation, Inc., 18 # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA or visit 19 # http://www.gnu.org/licenses/gpl.html 20 # 21 # *** CONTACT INFORMATION *** 22 # 23 # E-mail: metaur at users dot sourceforge dot net 24 # Web page: http://sourceforge.net/projects/kses 25 # Paper mail: (not at the moment) 26 # 27 # [kses strips evil scripts!] 28 29 function identify_modifier_kses () { 30 return array ( 31 'name' =>'kses', 32 'type' =>'smarty_modifier', 33 'nicename' =>'KSES XHTML Filter', 34 'description' =>'HTML/XHTML filter that only allows some elements and attributes', 35 'authors' =>'Ulf Harnhammar', 36 'licence' =>'GPL', 37 'help' => 'At the moment if you want to change the allowed html tags, you need to edit the file modifier.kses.php and edit the allowedtags array. Check the kses website for documentation.' 38 ); 39 } 40 41 function smarty_modifier_kses ($in,$allowedtags = FALSE) { 42 43 if($allowedtags == 'nolinks') { 44 /* need some way to specify a multi dimentional array via a smarty modifer paramater. e.g. {$var|kses:"a(href,title),b,i,blockquote(cite)"}. How to do that? 45 46 $tags = array(explode(',',$allowedtags)); 47 $allowed_html = array(); 48 foreach($tags as $tag) { 49 $allowed_html[] = array($tag=>array()); 50 } 51 52 .. for the mean time we'll just have a 'safe' list of things for unapproved comments 53 */ 54 $allowed_html = array( 55 'b' => array(), 56 'i' => array(), 57 'strong' => array(), 58 'code' => array(), 59 'acronym' => array('title'), 60 'abbr' => array('title'), 61 'blockquote' => array('cite' => array()) 62 ); 63 } else { 64 65 $allowed_html = array( 66 'b' => array(), 67 'i' => array(), 68 'strong' => array(), 69 'code' => array(), 70 'acronym' => array('title'), 71 'abbr' => array('title'), 72 'a' => array('href' => array('maxlen' => 300),'title','rel' => array('minlen' => 3, 'maxlen' => 250)), 73 'blockquote' => array('cite' => array()) 74 ); 75 } 76 77 return kses($in,$allowed_html,array('http','https','ftp','mailto')); 78 79 } 80 81 function kses($string, $allowed_html, $allowed_protocols = 82 array('http', 'https', 'ftp', 'news', 'nntp', 'telnet', 83 'gopher', 'mailto')) 84 ############################################################################### 85 # This function makes sure that only the allowed HTML element names, attribute 86 # names and attribute values plus only sane HTML entities will occur in 87 # $string. You have to remove any slashes from PHP's magic quotes before you 88 # call this function. 89 ############################################################################### 90 { 91 $string = kses_no_null($string); 92 $string = kses_js_entities($string); 93 $string = kses_normalize_entities($string); 94 $string = kses_hook($string); 95 $allowed_html_fixed = kses_array_lc($allowed_html); 96 return kses_split($string, $allowed_html_fixed, $allowed_protocols); 97 } # function kses 98 99 100 function kses_hook($string) 101 ############################################################################### 102 # You add any kses hooks here. 103 ############################################################################### 104 { 105 return $string; 106 } # function kses_hook 107 108 109 function kses_version() 110 ############################################################################### 111 # This function returns kses' version number. 112 ############################################################################### 113 { 114 return '0.2.1'; 115 } # function kses_version 116 117 118 function kses_split($string, $allowed_html, $allowed_protocols) 119 ############################################################################### 120 # This function searches for HTML tags, no matter how malformed. It also 121 # matches stray ">" characters. 122 ############################################################################### 123 { 124 return preg_replace('%(<'. # EITHER: < 125 '[^>]*'. # things that aren't > 126 '(>|$)'. # > or end of string 127 '|>)%e', # OR: just a > 128 "kses_split2('\\1', \$allowed_html, ". 129 '$allowed_protocols)', 130 $string); 131 } # function kses_split 132 133 134 function kses_split2($string, $allowed_html, $allowed_protocols) 135 ############################################################################### 136 # This function does a lot of work. It rejects some very malformed things 137 # like <:::>. It returns an empty string, if the element isn't allowed (look 138 # ma, no strip_tags()!). Otherwise it splits the tag into an element and an 139 # attribute list. 140 ############################################################################### 141 { 142 $string = kses_stripslashes($string); 143 144 if (substr($string, 0, 1) != '<') 145 return '>'; 146 # It matched a ">" character 147 148 if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches)) 149 return ''; 150 # It's seriously malformed 151 152 $slash = trim($matches[1]); 153 $elem = $matches[2]; 154 $attrlist = $matches[3]; 155 156 if (!is_array($allowed_html[strtolower($elem)])) 157 return ''; 158 # They are using a not allowed HTML element 159 160 return kses_attr("$slash$elem", $attrlist, $allowed_html, 161 $allowed_protocols); 162 } # function kses_split2 163 164 165 function kses_attr($element, $attr, $allowed_html, $allowed_protocols) 166 ############################################################################### 167 # This function removes all attributes, if none are allowed for this element. 168 # If some are allowed it calls kses_hair() to split them further, and then it 169 # builds up new HTML code from the data that kses_hair() returns. It also 170 # removes "<" and ">" characters, if there are any left. One more thing it 171 # does is to check if the tag has a closing XHTML slash, and if it does, 172 # it puts one in the returned code as well. 173 ############################################################################### 174 { 175 # Is there a closing XHTML slash at the end of the attributes? 176 177 $xhtml_slash = ''; 178 if (preg_match('%\s/\s*$%', $attr)) 179 $xhtml_slash = ' /'; 180 181 # Are any attributes allowed at all for this element? 182 183 if (count($allowed_html[strtolower($element)]) == 0) 184 return "<$element$xhtml_slash>"; 185 186 # Split it 187 188 $attrarr = kses_hair($attr, $allowed_protocols); 189 190 # Go through $attrarr, and save the allowed attributes for this element 191 # in $attr2 192 193 $attr2 = ''; 194 195 foreach ($attrarr as $arreach) 196 { 197 $current = $allowed_html[strtolower($element)] 198 [strtolower($arreach['name'])]; 199 if ($current == '') 200 continue; # the attribute is not allowed 201 202 if (!is_array($current)) 203 $attr2 .= ' '.$arreach['whole']; 204 # there are no checks 205 206 else 207 { 208 # there are some checks 209 $ok = true; 210 foreach ($current as $currkey => $currval) 211 if (!kses_check_attr_val($arreach['value'], $arreach['vless'], 212 $currkey, $currval)) 213 { $ok = false; break; } 214 215 if ($ok) 216 $attr2 .= ' '.$arreach['whole']; # it passed them 217 } # if !is_array($current) 218 } # foreach 219 220 # Remove any "<" or ">" characters 221 222 $attr2 = preg_replace('/[<>]/', '', $attr2); 223 224 return "<$element$attr2$xhtml_slash>"; 225 } # function kses_attr 226 227 228 function kses_hair($attr, $allowed_protocols) 229 ############################################################################### 230 # This function does a lot of work. It parses an attribute list into an array 231 # with attribute data, and tries to do the right thing even if it gets weird 232 # input. It will add quotes around attribute values that don't have any quotes 233 # or apostrophes around them, to make it easier to produce HTML code that will 234 # conform to W3C's HTML specification. It will also remove bad URL protocols 235 # from attribute values. 236 ############################################################################### 237 { 238 $attrarr = array(); 239 $mode = 0; 240 $attrname = ''; 241 242 # Loop through the whole attribute list 243 244 while (strlen($attr) != 0) 245 { 246 $working = 0; # Was the last operation successful? 247 248 switch ($mode) 249 { 250 case 0: # attribute name, href for instance 251 252 if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) 253 { 254 $attrname = $match[1]; 255 $working = $mode = 1; 256 $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr); 257 } 258 259 break; 260 261 case 1: # equals sign or valueless ("selected") 262 263 if (preg_match('/^\s*=\s*/', $attr)) # equals sign 264 { 265 $working = 1; $mode = 2; 266 $attr = preg_replace('/^\s*=\s*/', '', $attr); 267 break; 268 } 269 270 if (preg_match('/^\s+/', $attr)) # valueless 271 { 272 $working = 1; $mode = 0; 273 $attrarr[] = array 274 ('name' => $attrname, 275 'value' => '', 276 'whole' => $attrname, 277 'vless' => 'y'); 278 $attr = preg_replace('/^\s+/', '', $attr); 279 } 280 281 break; 282 283 case 2: # attribute value, a URL after href= for instance 284 285 if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) 286 # "value" 287 { 288 $thisval = kses_bad_protocol($match[1], $allowed_protocols); 289 290 $attrarr[] = array 291 ('name' => $attrname, 292 'value' => $thisval, 293 'whole' => "$attrname=\"$thisval\"", 294 'vless' => 'n'); 295 $working = 1; $mode = 0; 296 $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr); 297 break; 298 } 299 300 if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) 301 # 'value' 302 { 303 $thisval = kses_bad_protocol($match[1], $allowed_protocols); 304 305 $attrarr[] = array 306 ('name' => $attrname, 307 'value' => $thisval, 308 'whole' => "$attrname='$thisval'", 309 'vless' => 'n'); 310 $working = 1; $mode = 0; 311 $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr); 312 break; 313 } 314 315 if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) 316 # value 317 { 318 $thisval = kses_bad_protocol($match[1], $allowed_protocols); 319 320 $attrarr[] = array 321 ('name' => $attrname, 322 'value' => $thisval, 323 'whole' => "$attrname=\"$thisval\"", 324 'vless' => 'n'); 325 # We add quotes to conform to W3C's HTML spec. 326 $working = 1; $mode = 0; 327 $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr); 328 } 329 330 break; 331 } # switch 332 333 if ($working == 0) # not well formed, remove and try again 334 { 335 $attr = kses_html_error($attr); 336 $mode = 0; 337 } 338 } # while 339 340 if ($mode == 1) 341 # special case, for when the attribute list ends with a valueless 342 # attribute like "selected" 343 $attrarr[] = array 344 ('name' => $attrname, 345 'value' => '', 346 'whole' => $attrname, 347 'vless' => 'y'); 348 349 return $attrarr; 350 } # function kses_hair 351 352 353 function kses_check_attr_val($value, $vless, $checkname, $checkvalue) 354 ############################################################################### 355 # This function performs different checks for attribute values. The currently 356 # implemented checks are "maxlen", "minlen", "maxval", "minval" and "valueless" 357 # with even more checks to come soon. 358 ############################################################################### 359 { 360 $ok = true; 361 362 switch (strtolower($checkname)) 363 { 364 case 'maxlen': 365 # The maxlen check makes sure that the attribute value has a length not 366 # greater than the given value. This can be used to avoid Buffer Overflows 367 # in WWW clients and various Internet servers. 368 369 if (strlen($value) > $checkvalue) 370 $ok = false; 371 break; 372 373 case 'minlen': 374 # The minlen check makes sure that the attribute value has a length not 375 # smaller than the given value. 376 377 if (strlen($value) < $checkvalue) 378 $ok = false; 379 break; 380 381 case 'maxval': 382 # The maxval check does two things: it checks that the attribute value is 383 # an integer from 0 and up, without an excessive amount of zeroes or 384 # whitespace (to avoid Buffer Overflows). It also checks that the attribute 385 # value is not greater than the given value. 386 # This check can be used to avoid Denial of Service attacks. 387 388 if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value)) 389 $ok = false; 390 if ($value > $checkvalue) 391 $ok = false; 392 break; 393 394 case 'minval': 395 # The minval check checks that the attribute value is a positive integer, 396 # and that it is not smaller than the given value. 397 398 if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value)) 399 $ok = false; 400 if ($value < $checkvalue) 401 $ok = false; 402 break; 403 404 case 'valueless': 405 # The valueless check checks if the attribute has a value 406 # (like <a href="blah">) or not (<option selected>). If the given value 407 # is a "y" or a "Y", the attribute must not have a value. 408 # If the given value is an "n" or an "N", the attribute must have one. 409 410 if (strtolower($checkvalue) != $vless) 411 $ok = false; 412 break; 413 } # switch 414 415 return $ok; 416 } # function kses_check_attr_val 417 418 419 function kses_bad_protocol($string, $allowed_protocols) 420 ############################################################################### 421 # This function removes all non-allowed protocols from the beginning of 422 # $string. It ignores whitespace and the case of the letters, and it does 423 # understand HTML entities. It does its work in a while loop, so it won't be 424 # fooled by a string like "javascript:javascript:alert(57)". 425 ############################################################################### 426 { 427 $string = kses_no_null($string); 428 $string2 = $string.'a'; 429 430 while ($string != $string2) 431 { 432 $string2 = $string; 433 $string = kses_bad_protocol_once($string, $allowed_protocols); 434 } # while 435 436 return $string; 437 } # function kses_bad_protocol 438 439 440 function kses_no_null($string) 441 ############################################################################### 442 # This function removes any NULL or chr(173) characters in $string. 443 ############################################################################### 444 { 445 $string = preg_replace('/\0+/', '', $string); 446 $string = preg_replace('/(\\\\0)+/', '', $string); 447 448 $string = preg_replace('/\xad+/', '', $string); # deals with Opera "feature" 449 450 return $string; 451 } # function kses_no_null 452 453 454 function kses_stripslashes($string) 455 ############################################################################### 456 # This function changes the character sequence \" to just " 457 # It leaves all other slashes alone. It's really weird, but the quoting from 458 # preg_replace(//e) seems to require this. 459 ############################################################################### 460 { 461 return preg_replace('%\\\\"%', '"', $string); 462 } # function kses_stripslashes 463 464 465 function kses_array_lc($inarray) 466 ############################################################################### 467 # This function goes through an array, and changes the keys to all lower case. 468 ############################################################################### 469 { 470 $outarray = array(); 471 472 foreach ($inarray as $inkey => $inval) 473 { 474 $outkey = strtolower($inkey); 475 $outarray[$outkey] = array(); 476 477 foreach ($inval as $inkey2 => $inval2) 478 { 479 $outkey2 = strtolower($inkey2); 480 $outarray[$outkey][$outkey2] = $inval2; 481 } # foreach $inval 482 } # foreach $inarray 483 484 return $outarray; 485 } # function kses_array_lc 486 487 488 function kses_js_entities($string) 489 ############################################################################### 490 # This function removes the HTML JavaScript entities found in early versions of 491 # Netscape 4. 492 ############################################################################### 493 { 494 return preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string); 495 } # function kses_js_entities 496 497 498 function kses_html_error($string) 499 ############################################################################### 500 # This function deals with parsing errors in kses_hair(). The general plan is 501 # to remove everything to and including some whitespace, but it deals with 502 # quotes and apostrophes as well. 503 ############################################################################### 504 { 505 return preg_replace('/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $string); 506 } # function kses_html_error 507 508 509 function kses_bad_protocol_once($string, $allowed_protocols) 510 ############################################################################### 511 # This function searches for URL protocols at the beginning of $string, while 512 # handling whitespace and HTML entities. 513 ############################################################################### 514 { 515 return preg_replace('/^((&[^;]*;|[\sA-Za-z0-9])*)'. 516 '(:|:|&#[Xx]3[Aa];)\s*/e', 517 'kses_bad_protocol_once2("\\1", $allowed_protocols)', 518 $string); 519 } # function kses_bad_protocol_once 520 521 522 function kses_bad_protocol_once2($string, $allowed_protocols) 523 ############################################################################### 524 # This function processes URL protocols, checks to see if they're in the white- 525 # list or not, and returns different data depending on the answer. 526 ############################################################################### 527 { 528 $string2 = kses_decode_entities($string); 529 $string2 = preg_replace('/\s/', '', $string2); 530 $string2 = kses_no_null($string2); 531 $string2 = strtolower($string2); 532 533 $allowed = false; 534 foreach ($allowed_protocols as $one_protocol) 535 if (strtolower($one_protocol) == $string2) 536 { 537 $allowed = true; 538 break; 539 } 540 541 if ($allowed) 542 return "$string2:"; 543 else 544 return ''; 545 } # function kses_bad_protocol_once2 546 547 548 function kses_normalize_entities($string) 549 ############################################################################### 550 # This function normalizes HTML entities. It will convert "AT&T" to the correct 551 # "AT&T", ":" to ":", "&#XYZZY;" to "&#XYZZY;" and so on. 552 ############################################################################### 553 { 554 # Disarm all entities by converting & to & 555 556 $string = str_replace('&', '&', $string); 557 558 # Change back the allowed entities in our entity whitelist 559 560 $string = preg_replace('/&([A-Za-z][A-Za-z0-9]{0,19});/', 561 '&\\1;', $string); 562 $string = preg_replace('/&#0*([0-9]{1,5});/e', 563 'kses_normalize_entities2("\\1")', $string); 564 $string = preg_replace('/&#([Xx])0*(([0-9A-Fa-f]{2}){1,2});/', 565 '&#\\1\\2;', $string); 566 567 return $string; 568 } # function kses_normalize_entities 569 570 571 function kses_normalize_entities2($i) 572 ############################################################################### 573 # This function helps kses_normalize_entities() to only accept 16 bit values 574 # and nothing more for &#number; entities. 575 ############################################################################### 576 { 577 return (($i > 65535) ? "&#$i;" : "&#$i;"); 578 } # function kses_normalize_entities2 579 580 581 function kses_decode_entities($string) 582 ############################################################################### 583 # This function decodes numeric HTML entities (A and A). It doesn't 584 # do anything with other entities like ä, but we don't need them in the 585 # URL protocol whitelisting system anyway. 586 ############################################################################### 587 { 588 $string = preg_replace('/&#([0-9]+);/e', 'chr("\\1")', $string); 589 $string = preg_replace('/&#[Xx]([0-9A-Fa-f]+);/e', 'chr(hexdec("\\1"))', 590 $string); 591 592 return $string; 593 } # function kses_decode_entities 594 595 ?>
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
| [ Powered by PHPXref - Served by Debian GNU/Linux ] |