[ PHPXref.com ] [ Generated: Sun Jul 20 16:35:25 2008 ] [ bBlog 0.7.6 ]
[ Index ]     [ Variables ]     [ Functions ]     [ Classes ]     [ Constants ]     [ Statistics ]

title

Body

[close]

/bblog/bBlog_plugins/ -> modifier.kses.php (source)

   1  <?php
   2  
   3  # kses 0.2.1 - HTML/XHTML filter that only allows some elements and attributes
   4  # Copyright (C) 2002, 2003  Ulf Harnhammar
   5  #
   6  # This program is free software and open source software; you can redistribute
   7  # it and/or modify it under the terms of the GNU General Public License as
   8  # published by the Free Software Foundation; either version 2 of the License,
   9  # or (at your option) any later version.
  10  #
  11  # This program is distributed in the hope that it will be useful, but WITHOUT
  12  # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  14  # more details.
  15  #
  16  # You should have received a copy of the GNU General Public License along
  17  # with this program; if not, write to the Free Software Foundation, Inc.,
  18  # 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA  or visit
  19  # http://www.gnu.org/licenses/gpl.html
  20  #
  21  # *** CONTACT INFORMATION ***
  22  #
  23  # E-mail:      metaur at users dot sourceforge dot net
  24  # Web page:    http://sourceforge.net/projects/kses
  25  # Paper mail:  (not at the moment)
  26  #
  27  # [kses strips evil scripts!]
  28  
  29  function identify_modifier_kses () {
  30      return array (
  31      'name'           =>'kses',
  32      'type'             =>'smarty_modifier',
  33      'nicename'     =>'KSES XHTML Filter',
  34      'description'   =>'HTML/XHTML filter that only allows some elements and attributes',
  35      'authors'        =>'Ulf Harnhammar',
  36      'licence'         =>'GPL',
  37      'help'        => 'At the moment if you want to change the allowed html tags, you need to edit the file modifier.kses.php and edit the allowedtags array. Check the kses website for documentation.'
  38    );
  39  }
  40  
  41  function smarty_modifier_kses ($in,$allowedtags = FALSE) {
  42  
  43      if($allowedtags == 'nolinks') {
  44              /* need some way to specify a multi dimentional array via a smarty modifer paramater. e.g. {$var|kses:"a(href,title),b,i,blockquote(cite)"}. How to do that?
  45  
  46          $tags = array(explode(',',$allowedtags));
  47          $allowed_html = array();
  48          foreach($tags as $tag) {
  49                          $allowed_html[] = array($tag=>array());
  50          }
  51  
  52          .. for the mean time we'll just have a 'safe' list of things for unapproved comments
  53          */
  54          $allowed_html = array(
  55                  'b' => array(),
  56          'i' => array(),
  57          'strong' => array(),
  58          'code' => array(),
  59          'acronym' => array('title'),
  60          'abbr' => array('title'),
  61           'blockquote' => array('cite' => array())
  62          );
  63      } else {
  64  
  65          $allowed_html = array(
  66          'b' => array(),
  67          'i' => array(),
  68          'strong' => array(),
  69          'code' => array(),
  70          'acronym' => array('title'),
  71          'abbr' => array('title'),
  72                  'a' => array('href'  => array('maxlen' => 300),'title','rel' => array('minlen' => 3, 'maxlen' => 250)),
  73           'blockquote' => array('cite' => array())
  74          );
  75      }
  76  
  77      return kses($in,$allowed_html,array('http','https','ftp','mailto'));
  78  
  79  }
  80  
  81  function kses($string, $allowed_html, $allowed_protocols =
  82                 array('http', 'https', 'ftp', 'news', 'nntp', 'telnet',
  83                       'gopher', 'mailto'))
  84  ###############################################################################
  85  # This function makes sure that only the allowed HTML element names, attribute
  86  # names and attribute values plus only sane HTML entities will occur in
  87  # $string. You have to remove any slashes from PHP's magic quotes before you
  88  # call this function.
  89  ###############################################################################
  90  {
  91    $string = kses_no_null($string);
  92    $string = kses_js_entities($string);
  93    $string = kses_normalize_entities($string);
  94    $string = kses_hook($string);
  95    $allowed_html_fixed = kses_array_lc($allowed_html);
  96    return kses_split($string, $allowed_html_fixed, $allowed_protocols);
  97  } # function kses
  98  
  99  
 100  function kses_hook($string)
 101  ###############################################################################
 102  # You add any kses hooks here.
 103  ###############################################################################
 104  {
 105    return $string;
 106  } # function kses_hook
 107  
 108  
 109  function kses_version()
 110  ###############################################################################
 111  # This function returns kses' version number.
 112  ###############################################################################
 113  {
 114    return '0.2.1';
 115  } # function kses_version
 116  
 117  
 118  function kses_split($string, $allowed_html, $allowed_protocols)
 119  ###############################################################################
 120  # This function searches for HTML tags, no matter how malformed. It also
 121  # matches stray ">" characters.
 122  ###############################################################################
 123  {
 124    return preg_replace('%(<'.   # EITHER: <
 125                        '[^>]*'. # things that aren't >
 126                        '(>|$)'. # > or end of string
 127                        '|>)%e', # OR: just a >
 128                        "kses_split2('\\1', \$allowed_html, ".
 129                        '$allowed_protocols)',
 130                        $string);
 131  } # function kses_split
 132  
 133  
 134  function kses_split2($string, $allowed_html, $allowed_protocols)
 135  ###############################################################################
 136  # This function does a lot of work. It rejects some very malformed things
 137  # like <:::>. It returns an empty string, if the element isn't allowed (look
 138  # ma, no strip_tags()!). Otherwise it splits the tag into an element and an
 139  # attribute list.
 140  ###############################################################################
 141  {
 142    $string = kses_stripslashes($string);
 143  
 144    if (substr($string, 0, 1) != '<')
 145      return '&gt;';
 146      # It matched a ">" character
 147  
 148    if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches))
 149      return '';
 150      # It's seriously malformed
 151  
 152    $slash = trim($matches[1]);
 153    $elem = $matches[2];
 154    $attrlist = $matches[3];
 155  
 156    if (!is_array($allowed_html[strtolower($elem)]))
 157      return '';
 158      # They are using a not allowed HTML element
 159  
 160    return kses_attr("$slash$elem", $attrlist, $allowed_html,
 161                     $allowed_protocols);
 162  } # function kses_split2
 163  
 164  
 165  function kses_attr($element, $attr, $allowed_html, $allowed_protocols)
 166  ###############################################################################
 167  # This function removes all attributes, if none are allowed for this element.
 168  # If some are allowed it calls kses_hair() to split them further, and then it
 169  # builds up new HTML code from the data that kses_hair() returns. It also
 170  # removes "<" and ">" characters, if there are any left. One more thing it
 171  # does is to check if the tag has a closing XHTML slash, and if it does,
 172  # it puts one in the returned code as well.
 173  ###############################################################################
 174  {
 175  # Is there a closing XHTML slash at the end of the attributes?
 176  
 177    $xhtml_slash = '';
 178    if (preg_match('%\s/\s*$%', $attr))
 179      $xhtml_slash = ' /';
 180  
 181  # Are any attributes allowed at all for this element?
 182  
 183    if (count($allowed_html[strtolower($element)]) == 0)
 184      return "<$element$xhtml_slash>";
 185  
 186  # Split it
 187  
 188    $attrarr = kses_hair($attr, $allowed_protocols);
 189  
 190  # Go through $attrarr, and save the allowed attributes for this element
 191  # in $attr2
 192  
 193    $attr2 = '';
 194  
 195    foreach ($attrarr as $arreach)
 196    {
 197      $current = $allowed_html[strtolower($element)]
 198                              [strtolower($arreach['name'])];
 199      if ($current == '')
 200        continue; # the attribute is not allowed
 201  
 202      if (!is_array($current))
 203        $attr2 .= ' '.$arreach['whole'];
 204      # there are no checks
 205  
 206      else
 207      {
 208      # there are some checks
 209        $ok = true;
 210        foreach ($current as $currkey => $currval)
 211          if (!kses_check_attr_val($arreach['value'], $arreach['vless'],
 212                                   $currkey, $currval))
 213          { $ok = false; break; }
 214  
 215        if ($ok)
 216          $attr2 .= ' '.$arreach['whole']; # it passed them
 217      } # if !is_array($current)
 218    } # foreach
 219  
 220  # Remove any "<" or ">" characters
 221  
 222    $attr2 = preg_replace('/[<>]/', '', $attr2);
 223  
 224    return "<$element$attr2$xhtml_slash>";
 225  } # function kses_attr
 226  
 227  
 228  function kses_hair($attr, $allowed_protocols)
 229  ###############################################################################
 230  # This function does a lot of work. It parses an attribute list into an array
 231  # with attribute data, and tries to do the right thing even if it gets weird
 232  # input. It will add quotes around attribute values that don't have any quotes
 233  # or apostrophes around them, to make it easier to produce HTML code that will
 234  # conform to W3C's HTML specification. It will also remove bad URL protocols
 235  # from attribute values.
 236  ###############################################################################
 237  {
 238    $attrarr = array();
 239    $mode = 0;
 240    $attrname = '';
 241  
 242  # Loop through the whole attribute list
 243  
 244    while (strlen($attr) != 0)
 245    {
 246      $working = 0; # Was the last operation successful?
 247  
 248      switch ($mode)
 249      {
 250        case 0: # attribute name, href for instance
 251  
 252          if (preg_match('/^([-a-zA-Z]+)/', $attr, $match))
 253          {
 254            $attrname = $match[1];
 255            $working = $mode = 1;
 256            $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
 257          }
 258  
 259          break;
 260  
 261        case 1: # equals sign or valueless ("selected")
 262  
 263          if (preg_match('/^\s*=\s*/', $attr)) # equals sign
 264          {
 265            $working = 1; $mode = 2;
 266            $attr = preg_replace('/^\s*=\s*/', '', $attr);
 267            break;
 268          }
 269  
 270          if (preg_match('/^\s+/', $attr)) # valueless
 271          {
 272            $working = 1; $mode = 0;
 273            $attrarr[] = array
 274                          ('name'  => $attrname,
 275                           'value' => '',
 276                           'whole' => $attrname,
 277                           'vless' => 'y');
 278            $attr = preg_replace('/^\s+/', '', $attr);
 279          }
 280  
 281          break;
 282  
 283        case 2: # attribute value, a URL after href= for instance
 284  
 285          if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match))
 286           # "value"
 287          {
 288            $thisval = kses_bad_protocol($match[1], $allowed_protocols);
 289  
 290            $attrarr[] = array
 291                          ('name'  => $attrname,
 292                           'value' => $thisval,
 293                           'whole' => "$attrname=\"$thisval\"",
 294                           'vless' => 'n');
 295            $working = 1; $mode = 0;
 296            $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
 297            break;
 298          }
 299  
 300          if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match))
 301           # 'value'
 302          {
 303            $thisval = kses_bad_protocol($match[1], $allowed_protocols);
 304  
 305            $attrarr[] = array
 306                          ('name'  => $attrname,
 307                           'value' => $thisval,
 308                           'whole' => "$attrname='$thisval'",
 309                           'vless' => 'n');
 310            $working = 1; $mode = 0;
 311            $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
 312            break;
 313          }
 314  
 315          if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match))
 316           # value
 317          {
 318            $thisval = kses_bad_protocol($match[1], $allowed_protocols);
 319  
 320            $attrarr[] = array
 321                          ('name'  => $attrname,
 322                           'value' => $thisval,
 323                           'whole' => "$attrname=\"$thisval\"",
 324                           'vless' => 'n');
 325                           # We add quotes to conform to W3C's HTML spec.
 326            $working = 1; $mode = 0;
 327            $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
 328          }
 329  
 330          break;
 331      } # switch
 332  
 333      if ($working == 0) # not well formed, remove and try again
 334      {
 335        $attr = kses_html_error($attr);
 336        $mode = 0;
 337      }
 338    } # while
 339  
 340    if ($mode == 1)
 341    # special case, for when the attribute list ends with a valueless
 342    # attribute like "selected"
 343      $attrarr[] = array
 344                    ('name'  => $attrname,
 345                     'value' => '',
 346                     'whole' => $attrname,
 347                     'vless' => 'y');
 348  
 349    return $attrarr;
 350  } # function kses_hair
 351  
 352  
 353  function kses_check_attr_val($value, $vless, $checkname, $checkvalue)
 354  ###############################################################################
 355  # This function performs different checks for attribute values. The currently
 356  # implemented checks are "maxlen", "minlen", "maxval", "minval" and "valueless"
 357  # with even more checks to come soon.
 358  ###############################################################################
 359  {
 360    $ok = true;
 361  
 362    switch (strtolower($checkname))
 363    {
 364      case 'maxlen':
 365      # The maxlen check makes sure that the attribute value has a length not
 366      # greater than the given value. This can be used to avoid Buffer Overflows
 367      # in WWW clients and various Internet servers.
 368  
 369        if (strlen($value) > $checkvalue)
 370          $ok = false;
 371        break;
 372  
 373      case 'minlen':
 374      # The minlen check makes sure that the attribute value has a length not
 375      # smaller than the given value.
 376  
 377        if (strlen($value) < $checkvalue)
 378          $ok = false;
 379        break;
 380  
 381      case 'maxval':
 382      # The maxval check does two things: it checks that the attribute value is
 383      # an integer from 0 and up, without an excessive amount of zeroes or
 384      # whitespace (to avoid Buffer Overflows). It also checks that the attribute
 385      # value is not greater than the given value.
 386      # This check can be used to avoid Denial of Service attacks.
 387  
 388        if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
 389          $ok = false;
 390        if ($value > $checkvalue)
 391          $ok = false;
 392        break;
 393  
 394      case 'minval':
 395      # The minval check checks that the attribute value is a positive integer,
 396      # and that it is not smaller than the given value.
 397  
 398        if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
 399          $ok = false;
 400        if ($value < $checkvalue)
 401          $ok = false;
 402        break;
 403  
 404      case 'valueless':
 405      # The valueless check checks if the attribute has a value
 406      # (like <a href="blah">) or not (<option selected>). If the given value
 407      # is a "y" or a "Y", the attribute must not have a value.
 408      # If the given value is an "n" or an "N", the attribute must have one.
 409  
 410        if (strtolower($checkvalue) != $vless)
 411          $ok = false;
 412        break;
 413    } # switch
 414  
 415    return $ok;
 416  } # function kses_check_attr_val
 417  
 418  
 419  function kses_bad_protocol($string, $allowed_protocols)
 420  ###############################################################################
 421  # This function removes all non-allowed protocols from the beginning of
 422  # $string. It ignores whitespace and the case of the letters, and it does
 423  # understand HTML entities. It does its work in a while loop, so it won't be
 424  # fooled by a string like "javascript:javascript:alert(57)".
 425  ###############################################################################
 426  {
 427    $string = kses_no_null($string);
 428    $string2 = $string.'a';
 429  
 430    while ($string != $string2)
 431    {
 432      $string2 = $string;
 433      $string = kses_bad_protocol_once($string, $allowed_protocols);
 434    } # while
 435  
 436    return $string;
 437  } # function kses_bad_protocol
 438  
 439  
 440  function kses_no_null($string)
 441  ###############################################################################
 442  # This function removes any NULL or chr(173) characters in $string.
 443  ###############################################################################
 444  {
 445    $string = preg_replace('/\0+/', '', $string);
 446    $string = preg_replace('/(\\\\0)+/', '', $string);
 447  
 448    $string = preg_replace('/\xad+/', '', $string); # deals with Opera "feature"
 449  
 450    return $string;
 451  } # function kses_no_null
 452  
 453  
 454  function kses_stripslashes($string)
 455  ###############################################################################
 456  # This function changes the character sequence  \"  to just  "
 457  # It leaves all other slashes alone. It's really weird, but the quoting from
 458  # preg_replace(//e) seems to require this.
 459  ###############################################################################
 460  {
 461    return preg_replace('%\\\\"%', '"', $string);
 462  } # function kses_stripslashes
 463  
 464  
 465  function kses_array_lc($inarray)
 466  ###############################################################################
 467  # This function goes through an array, and changes the keys to all lower case.
 468  ###############################################################################
 469  {
 470    $outarray = array();
 471  
 472    foreach ($inarray as $inkey => $inval)
 473    {
 474      $outkey = strtolower($inkey);
 475      $outarray[$outkey] = array();
 476  
 477      foreach ($inval as $inkey2 => $inval2)
 478      {
 479        $outkey2 = strtolower($inkey2);
 480        $outarray[$outkey][$outkey2] = $inval2;
 481      } # foreach $inval
 482    } # foreach $inarray
 483  
 484    return $outarray;
 485  } # function kses_array_lc
 486  
 487  
 488  function kses_js_entities($string)
 489  ###############################################################################
 490  # This function removes the HTML JavaScript entities found in early versions of
 491  # Netscape 4.
 492  ###############################################################################
 493  {
 494    return preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
 495  } # function kses_js_entities
 496  
 497  
 498  function kses_html_error($string)
 499  ###############################################################################
 500  # This function deals with parsing errors in kses_hair(). The general plan is
 501  # to remove everything to and including some whitespace, but it deals with
 502  # quotes and apostrophes as well.
 503  ###############################################################################
 504  {
 505    return preg_replace('/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $string);
 506  } # function kses_html_error
 507  
 508  
 509  function kses_bad_protocol_once($string, $allowed_protocols)
 510  ###############################################################################
 511  # This function searches for URL protocols at the beginning of $string, while
 512  # handling whitespace and HTML entities.
 513  ###############################################################################
 514  {
 515    return preg_replace('/^((&[^;]*;|[\sA-Za-z0-9])*)'.
 516                        '(:|&#58;|&#[Xx]3[Aa];)\s*/e',
 517                        'kses_bad_protocol_once2("\\1", $allowed_protocols)',
 518                        $string);
 519  } # function kses_bad_protocol_once
 520  
 521  
 522  function kses_bad_protocol_once2($string, $allowed_protocols)
 523  ###############################################################################
 524  # This function processes URL protocols, checks to see if they're in the white-
 525  # list or not, and returns different data depending on the answer.
 526  ###############################################################################
 527  {
 528    $string2 = kses_decode_entities($string);
 529    $string2 = preg_replace('/\s/', '', $string2);
 530    $string2 = kses_no_null($string2);
 531    $string2 = strtolower($string2);
 532  
 533    $allowed = false;
 534    foreach ($allowed_protocols as $one_protocol)
 535      if (strtolower($one_protocol) == $string2)
 536      {
 537        $allowed = true;
 538        break;
 539      }
 540  
 541    if ($allowed)
 542      return "$string2:";
 543    else
 544      return '';
 545  } # function kses_bad_protocol_once2
 546  
 547  
 548  function kses_normalize_entities($string)
 549  ###############################################################################
 550  # This function normalizes HTML entities. It will convert "AT&T" to the correct
 551  # "AT&amp;T", "&#00058;" to "&#58;", "&#XYZZY;" to "&amp;#XYZZY;" and so on.
 552  ###############################################################################
 553  {
 554  # Disarm all entities by converting & to &amp;
 555  
 556    $string = str_replace('&', '&amp;', $string);
 557  
 558  # Change back the allowed entities in our entity whitelist
 559  
 560    $string = preg_replace('/&amp;([A-Za-z][A-Za-z0-9]{0,19});/',
 561                           '&\\1;', $string);
 562    $string = preg_replace('/&amp;#0*([0-9]{1,5});/e',
 563                           'kses_normalize_entities2("\\1")', $string);
 564    $string = preg_replace('/&amp;#([Xx])0*(([0-9A-Fa-f]{2}){1,2});/',
 565                           '&#\\1\\2;', $string);
 566  
 567    return $string;
 568  } # function kses_normalize_entities
 569  
 570  
 571  function kses_normalize_entities2($i)
 572  ###############################################################################
 573  # This function helps kses_normalize_entities() to only accept 16 bit values
 574  # and nothing more for &#number; entities.
 575  ###############################################################################
 576  {
 577    return (($i > 65535) ? "&amp;#$i;" : "&#$i;");
 578  } # function kses_normalize_entities2
 579  
 580  
 581  function kses_decode_entities($string)
 582  ###############################################################################
 583  # This function decodes numeric HTML entities (&#65; and &#x41;). It doesn't
 584  # do anything with other entities like &auml;, but we don't need them in the
 585  # URL protocol whitelisting system anyway.
 586  ###############################################################################
 587  {
 588    $string = preg_replace('/&#([0-9]+);/e', 'chr("\\1")', $string);
 589    $string = preg_replace('/&#[Xx]([0-9A-Fa-f]+);/e', 'chr(hexdec("\\1"))',
 590                           $string);
 591  
 592    return $string;
 593  } # function kses_decode_entities
 594  
 595  ?>


[ Powered by PHPXref - Served by Debian GNU/Linux ]