PHP Cross Reference - Textpattern - Source: /textpattern/vendors/Netcarver/Textile/Parser.php

Source: /textpattern/vendors/Netcarver/Textile/Parser.php - 4042 lines - 122766 bytes - Summary - Text - Print

Description: Textile - A Humane Web Text Generator.
   1  <?php
   2  
   3  /**
   4   * Textile - A Humane Web Text Generator.
   5   *
   6   * @link https://github.com/textile/php-textile
   7   */
   8  
   9  namespace Netcarver\Textile;
  10  
  11  /*
  12   * Textile - A Humane Web Text Generator
  13   *
  14   * Copyright (c) 2003-2004, Dean Allen <dean@textism.com>
  15   * All rights reserved.
  16   *
  17   * Thanks to Carlo Zottmann <carlo@g-blog.net> for refactoring
  18   * Textile's procedural code into a class framework
  19   *
  20   * Additions and fixes Copyright (c) 2006    Alex Shiels       https://twitter.com/tellyworth
  21   * Additions and fixes Copyright (c) 2010    Stef Dawson       http://stefdawson.com/
  22   * Additions and fixes Copyright (c) 2010-13 Netcarver         https://github.com/netcarver
  23   * Additions and fixes Copyright (c) 2011    Jeff Soo          http://ipsedixit.net/
  24   * Additions and fixes Copyright (c) 2012    Robert Wetzlmayr  http://wetzlmayr.com/
  25   * Additions and fixes Copyright (c) 2012-13 Jukka Svahn       http://rahforum.biz/
  26   *
  27   * Redistribution and use in source and binary forms, with or without
  28   * modification, are permitted provided that the following conditions are met:
  29   *
  30   * * Redistributions of source code must retain the above copyright notice,
  31   * this list of conditions and the following disclaimer.
  32   *
  33   * * Redistributions in binary form must reproduce the above copyright notice,
  34   * this list of conditions and the following disclaimer in the documentation
  35   * and/or other materials provided with the distribution.
  36   *
  37   * * Neither the name Textile nor the names of its contributors may be used to
  38   * endorse or promote products derived from this software without specific
  39   * prior written permission.
  40   *
  41   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  42   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  43   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  44   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  45   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  46   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  47   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  48   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  49   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  50   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  51   * POSSIBILITY OF SUCH DAMAGE.
  52   */
  53  
  54  /*
  55  Textile usage examples.
  56  
  57  Block modifier syntax:
  58  
  59      Header: h(1-6).
  60      Paragraphs beginning with 'hn. ' (where n is 1-6) are wrapped in header tags.
  61      Example: h1. Header... -> <h1>Header...</h1>
  62  
  63      Paragraph: p. (also applied by default)
  64      Example: p. Text -> <p>Text</p>
  65  
  66      Blockquote: bq.
  67      Example: bq. Block quotation... -> <blockquote>Block quotation...</blockquote>
  68  
  69      Blockquote with citation: bq.:http://citation.url
  70      Example: bq.:http://textism.com/ Text...
  71      ->    <blockquote cite="http://textism.com">Text...</blockquote>
  72  
  73      Footnote: fn(1-100).
  74      Example: fn1. Footnote... -> <p id="fn1">Footnote...</p>
  75  
  76      Numeric list: #, ##
  77      Consecutive paragraphs beginning with # are wrapped in ordered list tags.
  78      Example: <ol><li>ordered list</li></ol>
  79  
  80      Bulleted list: *, **
  81      Consecutive paragraphs beginning with * are wrapped in unordered list tags.
  82      Example: <ul><li>unordered list</li></ul>
  83  
  84      Definition list:
  85          Terms ;, ;;
  86          Definitions :, ::
  87      Consecutive paragraphs beginning with ; or : are wrapped in definition list tags.
  88      Example: <dl><dt>term</dt><dd>definition</dd></dl>
  89  
  90      Redcloth-style Definition list:
  91          - Term1 := Definition1
  92          - Term2 := Extended
  93            definition =:
  94  
  95  Phrase modifier syntax:
  96  
  97             _emphasis_    ->     <em>emphasis</em>
  98             __italic__    ->     <i>italic</i>
  99               *strong*    ->     <strong>strong</strong>
 100               **bold**    ->     <b>bold</b>
 101           ??citation??    ->     <cite>citation</cite>
 102         -deleted text-    ->     <del>deleted</del>
 103        +inserted text+    ->     <ins>inserted</ins>
 104          ^superscript^    ->     <sup>superscript</sup>
 105            ~subscript~    ->     <sub>subscript</sub>
 106                 @code@    ->     <code>computer code</code>
 107            %(bob)span%    ->     <span class="bob">span</span>
 108  
 109          ==notextile==    ->     leave text alone (do not format)
 110  
 111         "linktext":url    ->     <a href="url">linktext</a>
 112  "linktext(title)":url    ->     <a href="url" title="title">linktext</a>
 113                "$":url    ->     <a href="url">url</a>
 114         "$(title)":url    ->     <a href="url" title="title">url</a>
 115  
 116             !imageurl!    ->     <img src="imageurl" />
 117   !imageurl(alt text)!    ->     <img src="imageurl" alt="alt text" />
 118     !imageurl!:linkurl    ->     <a href="linkurl"><img src="imageurl" /></a>
 119  
 120  ABC(Always Be Closing)   ->     <acronym title="Always Be Closing">ABC</acronym>
 121  
 122  Linked Notes:
 123  
 124      Allows the generation of an automated list of notes with links.
 125  
 126      Linked notes are composed of three parts, a set of named _definitions_, a set of
 127      _references_ to those definitions and one or more _placeholders_ indicating where
 128      the consolidated list of notes is to be placed in your document.
 129  
 130      Definitions:
 131  
 132      Each note definition must occur in its own paragraph and should look like this...
 133  
 134      note#mynotelabel. Your definition text here.
 135  
 136      You are free to use whatever label you wish after the # as long as it is made up
 137      of letters, numbers, colon(:) or dash(-).
 138  
 139      References:
 140  
 141      Each note reference is marked in your text like this[#mynotelabel] and
 142      it will be replaced with a superscript reference that links into the list of
 143      note definitions.
 144  
 145      List placeholder(s):
 146  
 147      The note list can go anywhere in your document. You have to indicate where
 148      like this:
 149  
 150      notelist.
 151  
 152      notelist can take attributes (class#id) like this: notelist(class#id).
 153  
 154      By default, the note list will show each definition in the order that they
 155      are referenced in the text by the _references_. It will show each definition with
 156      a full list of backlinks to each reference. If you do not want this, you can choose
 157      to override the backlinks like this...
 158  
 159      notelist(class#id)!.    Produces a list with no backlinks.
 160      notelist(class#id)^.    Produces a list with only the first backlink.
 161  
 162      Should you wish to have a specific definition display backlinks differently to this
 163      then you can override the backlink method by appending a link override to the
 164      _definition_ you wish to customise.
 165  
 166      note#label.    Uses the citelist's setting for backlinks.
 167      note#label!.   Causes that definition to have no backlinks.
 168      note#label^.   Causes that definition to have one backlink (to the first ref.)
 169      note#label*.   Causes that definition to have all backlinks.
 170  
 171      Any unreferenced notes will be left out of the list unless you explicitly state
 172      you want them by adding a '+'. Like this...
 173  
 174      notelist(class#id)!+. Giving a list of all notes without any backlinks.
 175  
 176      You can mix and match the list backlink control and unreferenced links controls
 177      but the backlink control (if any) must go first. Like so: notelist^+. , not
 178      like this: notelist+^.
 179  
 180      Example...
 181          Scientists say[#lavader] the moon is small.
 182  
 183          note#other. An unreferenced note.
 184  
 185          note#lavader(myliclass). "Proof":http://example.com of a small moon.
 186  
 187          notelist(myclass#myid)+.
 188  
 189          Would output (the actual IDs used would be randomised)...
 190  
 191          <p>Scientists say<sup><a href="#note1" id="noteref1">1</sup> the moon is small.</p>
 192  
 193          <ol class="myclass" id="myid">
 194              <li class="myliclass"><a href="#noteref1"><sup>a</sup></a>
 195                  <span id="note1"> </span><a href="http://example.com">Proof</a> of a small moon.</li>
 196              <li>An unreferenced note.</li>
 197          </ol>
 198  
 199          The 'a b c' backlink characters can be altered too.
 200          For example if you wanted the notes to have numeric backlinks starting from 1:
 201  
 202          notelist:1.
 203  
 204  Table syntax:
 205  
 206      Simple tables:
 207  
 208          |a|simple|table|row|
 209          |And|Another|table|row|
 210          |With an||empty|cell|
 211  
 212          |=. My table caption goes here
 213          |_. A|_. table|_. header|_.row|
 214          |A|simple|table|row|
 215  
 216      Note: Table captions *must* be the first line of the table else treated as a center-aligned cell.
 217  
 218      Tables with attributes:
 219  
 220          table{border:1px solid black}. My table summary here
 221          {background:#ddd;color:red}. |{}| | | |
 222  
 223      To specify thead / tfoot / tbody groups, add one of these on its own line
 224      above the row(s) you wish to wrap (you may specify attributes before the dot):
 225  
 226          |^.     # thead
 227          |-.     # tbody
 228          |~.     # tfoot
 229  
 230      Column groups:
 231  
 232          |:\3. 100|
 233  
 234          Becomes:
 235              <colgroup span="3" width="100"></colgroup>
 236  
 237          You can omit either or both of the \N or width values. You may also
 238          add cells after the colgroup definition to specify col elements with
 239          span, width, or standard Textile attributes:
 240  
 241          |:. 50|(firstcol). |\2. 250||300|
 242  
 243          Becomes:
 244              <colgroup width="50">
 245                  <col class="firstcol" />
 246                  <col span="2" width="250" />
 247                  <col />
 248                  <col width="300" />
 249              </colgroup>
 250  
 251          (Note that, per the HTML specification, you should not add span
 252          to the colgroup if specifying col elements.)
 253  
 254  Applying Attributes:
 255  
 256      Most anywhere Textile code is used, attributes such as arbitrary css style,
 257      css classes, and ids can be applied. The syntax is fairly consistent.
 258  
 259      The following characters quickly alter the alignment of block elements:
 260  
 261          <  ->  left align     ex. p<. left-aligned para
 262          >  ->  right align         h3>. right-aligned header 3
 263          =  ->  centred             h4=. centred header 4
 264          <> ->  justified         p<>. justified paragraph
 265  
 266      These will change vertical alignment in table cells:
 267  
 268          ^  ->  top           ex. |^. top-aligned table cell|
 269          -  ->  middle           |-. middle aligned|
 270          ~  ->  bottom           |~. bottom aligned cell|
 271  
 272      Plain (parentheses) inserted between block syntax and the closing dot-space
 273      indicate classes and ids:
 274  
 275          p(hector). paragraph -> <p class="hector">paragraph</p>
 276  
 277          p(#fluid). paragraph -> <p id="fluid">paragraph</p>
 278  
 279          (classes and ids can be combined)
 280          p(hector#fluid). paragraph -> <p class="hector" id="fluid">paragraph</p>
 281  
 282      Curly {brackets} insert arbitrary css style
 283  
 284          p{line-height:18px}. paragraph -> <p style="line-height:18px">paragraph</p>
 285  
 286          h3{color:red}. header 3 -> <h3 style="color:red">header 3</h3>
 287  
 288      Square [brackets] insert language attributes
 289  
 290          p[no]. paragraph -> <p lang="no">paragraph</p>
 291  
 292          %[fr]phrase% -> <span lang="fr">phrase</span>
 293  
 294      Usually Textile block element syntax requires a dot and space before the block
 295      begins, but since lists don't, they can be styled just using braces
 296  
 297          #{color:blue} one  ->  <ol style="color:blue">
 298          # big                    <li>one</li>
 299          # list                    <li>big</li>
 300                                  <li>list</li>
 301                                 </ol>
 302  
 303      Using the span tag to style a phrase
 304  
 305          It goes like this, %{color:red}the fourth the fifth%
 306                -> It goes like this, <span style="color:red">the fourth the fifth</span>
 307  
 308  Ordered list start and continuation:
 309  
 310      You can control the start attribute of an ordered list like so;
 311  
 312          #5 Item 5
 313          # Item 6
 314  
 315      You can resume numbering list items after some intervening anonymous block like so...
 316  
 317          #_ Item 7
 318          # Item 8
 319  */
 320  
 321  /**
 322   * Textile parser.
 323   *
 324   * The Parser class takes Textile input and
 325   * converts it to well formatted HTML. This is
 326   * the library's main class, hosting the parsing
 327   * functionality and exposing a simple
 328   * public interface for you to use.
 329   *
 330   * The most basic use case would involve initialising
 331   * a instance of the class and calling the textileThis
 332   * method, parsing the given Textile input in unrestricted
 333   * mode.
 334   *
 335   * <code>
 336   * $parser = new \Netcarver\Textile\Parser();
 337   * echo $parser->textileThis('h1. Hello World!');
 338   * </code>
 339   *
 340   * @see Parser::__construct()
 341   * @see Parser::textileThis()
 342   * @see Parser::textileRestricted()
 343   */
 344  
 345  class Parser
 346  {
 347      /**
 348       * Version number.
 349       *
 350       * @var string
 351       */
 352  
 353      protected $ver = '3.5.5';
 354  
 355      /**
 356       * Regular expression snippets.
 357       *
 358       * @var array
 359       */
 360  
 361      protected $regex_snippets;
 362  
 363      /**
 364       * Pattern for horizontal align.
 365       *
 366       * @var string
 367       */
 368  
 369      protected $hlgn = "(?:\<(?!>)|&lt;&gt;|&gt;|&lt;|(?<!<)\>|\<\>|\=|[()]+(?! ))";
 370  
 371      /**
 372       * Pattern for vertical align.
 373       *
 374       * @var string
 375       */
 376  
 377      protected $vlgn = "[\-^~]";
 378  
 379      /**
 380       * Pattern for HTML classes and IDs.
 381       *
 382       * Does not allow classes/ids/languages/styles to span across
 383       * newlines if used in a dotall regular expression.
 384       *
 385       * @var string
 386       */
 387  
 388      protected $clas = "(?:\([^)\n]+\))";
 389  
 390      /**
 391       * Pattern for language attribute.
 392       *
 393       * @var string
 394       */
 395  
 396      protected $lnge = "(?:\[[^]\n]+\])";
 397  
 398      /**
 399       * Pattern for style attribute.
 400       *
 401       * @var string
 402       */
 403  
 404      protected $styl = "(?:\{[^}\n]+\})";
 405  
 406      /**
 407       * Regular expression pattern for column spans in tables.
 408       *
 409       * @var string
 410       */
 411  
 412      protected $cspn = "(?:\\\\[0-9]+)";
 413  
 414      /**
 415       * Regular expression for row spans in tables.
 416       *
 417       * @var string
 418       */
 419  
 420      protected $rspn = "(?:\/[0-9]+)";
 421  
 422      /**
 423       * Regular expression for horizontal or vertical alignment.
 424       *
 425       * @var string
 426       */
 427  
 428      protected $a;
 429  
 430      /**
 431       * Regular expression for column or row spans in tables.
 432       *
 433       * @var string
 434       */
 435  
 436      protected $s;
 437  
 438      /**
 439       * Pattern that matches a class, style, language and horizontal alignment attributes.
 440       *
 441       * @var string
 442       */
 443  
 444      protected $c;
 445  
 446      /**
 447       * Pattern that matches class, style and language attributes.
 448       *
 449       * Will allows all 16 possible permutations of class, style and language attributes.
 450       * <no attribute>, c, cl, cs, cls, csl, l, lc, ls, lcs, lsc, s, sc, sl, scl or slc
 451       *
 452       * @var string
 453       */
 454      protected $cls;
 455  
 456      /**
 457       * Whitelisted block tags.
 458       *
 459       * @var array
 460       */
 461  
 462      protected $blocktag_whitelist = array();
 463  
 464      /**
 465       * Pattern for punctation.
 466       *
 467       * @var string
 468       */
 469  
 470      protected $pnct = '[\!"#\$%&\'()\*\+,\-\./:;<=>\?@\[\\\]\^_`{\|}\~]';
 471  
 472      /**
 473       * Pattern for URL.
 474       *
 475       * @var string
 476       */
 477  
 478      protected $urlch;
 479  
 480      /**
 481       * Matched marker symbols.
 482       *
 483       * @var string
 484       */
 485  
 486      protected $syms = '¤§µ¶†‡•∗∴◊♠♣♥♦';
 487  
 488      /**
 489       * HTML rel attribute used for links.
 490       *
 491       * @var string
 492       */
 493  
 494      protected $rel;
 495  
 496      /**
 497       * Array of footnotes
 498       *
 499       * @var array
 500       */
 501      protected $fn;
 502  
 503      /**
 504       * Shelved content.
 505       *
 506       * Stores fragments of the source text that have been parsed
 507       * and require no more processing.
 508       *
 509       * @var array
 510       */
 511  
 512      protected $shelf = array();
 513  
 514      /**
 515       * Restricted mode.
 516       *
 517       * @var bool
 518       */
 519  
 520      protected $restricted = false;
 521  
 522      /**
 523       * Disallow images.
 524       *
 525       * @var bool
 526       */
 527  
 528      protected $noimage = false;
 529  
 530      /**
 531       * Lite mode.
 532       *
 533       * @var bool
 534       */
 535  
 536      protected $lite = false;
 537  
 538      /**
 539       * Accepted link protocols.
 540       *
 541       * @var array
 542       */
 543  
 544      protected $url_schemes = array();
 545  
 546      /**
 547       * Restricted link protocols.
 548       *
 549       * @var array
 550       */
 551  
 552      protected $restricted_url_schemes = array(
 553          'http',
 554          'https',
 555          'ftp',
 556          'mailto',
 557      );
 558  
 559      /**
 560       * Unrestricted link protocols.
 561       *
 562       * @var array
 563       */
 564  
 565      protected $unrestricted_url_schemes = array(
 566          'http',
 567          'https',
 568          'ftp',
 569          'mailto',
 570          'file',
 571          'tel',
 572          'callto',
 573          'sftp',
 574      );
 575  
 576      /**
 577       * Span tags.
 578       *
 579       * @var array
 580       */
 581  
 582      protected $span_tags = array(
 583          '*'  => 'strong',
 584          '**' => 'b',
 585          '??' => 'cite',
 586          '_'  => 'em',
 587          '__' => 'i',
 588          '-'  => 'del',
 589          '%'  => 'span',
 590          '+'  => 'ins',
 591          '~'  => 'sub',
 592          '^'  => 'sup',
 593      );
 594  
 595      /**
 596       * Patterns for finding glyphs.
 597       *
 598       * An array of regex patterns used to find text features
 599       * such as apostrophes, fractions and em-dashes. Each
 600       * entry in this array must have a corresponding entry in
 601       * the $glyph_replace array.
 602       *
 603       * @var null|array
 604       * @see Parser::$glyph_replace
 605       */
 606  
 607      protected $glyph_search  = null;
 608  
 609      /**
 610       * Glyph replacements.
 611       *
 612       * An array of replacements used to insert typographic glyphs
 613       * into the text. Each entry must have a corresponding entry in
 614       * the $glyph_search array and may refer to values captured in
 615       * the corresponding search regex.
 616       *
 617       * @var null|array
 618       * @see Parser::$glyph_search
 619       */
 620  
 621      protected $glyph_replace = null;
 622  
 623      /**
 624       * Indicates whether glyph substitution is required.
 625       *
 626       * Dirty flag, set by setSymbol(), indicating the parser needs to
 627       * rebuild the glyph substitutions before the next parse.
 628       *
 629       * @var bool
 630       * @see Parser::setSymbol()
 631       */
 632  
 633      protected $rebuild_glyphs = true;
 634  
 635      /**
 636       * Relative image path.
 637       *
 638       * @var string
 639       */
 640  
 641      protected $relativeImagePrefix = '';
 642  
 643      /**
 644       * Maximum nesting level for inline elements.
 645       *
 646       * @var int
 647       */
 648  
 649      protected $max_span_depth = 5;
 650  
 651      /**
 652       * Server document root.
 653       *
 654       * @var string
 655       */
 656  
 657      protected $doc_root;
 658  
 659      /**
 660       * Target document type.
 661       *
 662       * @var string
 663       */
 664  
 665      protected $doctype;
 666  
 667      /**
 668       * Substitution symbols.
 669       *
 670       * Basic symbols used in textile glyph replacements. To override these, call
 671       * setSymbol method before calling textileThis or textileRestricted.
 672       *
 673       * @var array
 674       * @see Parser::setSymbol()
 675       */
 676  
 677      protected $symbols = array(
 678          'quote_single_open'  => '&#8216;',
 679          'quote_single_close' => '&#8217;',
 680          'quote_double_open'  => '&#8220;',
 681          'quote_double_close' => '&#8221;',
 682          'apostrophe'         => '&#8217;',
 683          'prime'              => '&#8242;',
 684          'prime_double'       => '&#8243;',
 685          'ellipsis'           => '&#8230;',
 686          'emdash'             => '&#8212;',
 687          'endash'             => '&#8211;',
 688          'dimension'          => '&#215;',
 689          'trademark'          => '&#8482;',
 690          'registered'         => '&#174;',
 691          'copyright'          => '&#169;',
 692          'half'               => '&#189;',
 693          'quarter'            => '&#188;',
 694          'threequarters'      => '&#190;',
 695          'degrees'            => '&#176;',
 696          'plusminus'          => '&#177;',
 697          'fn_ref_pattern'     => '<sup{atts}>{marker}</sup>',
 698          'fn_foot_pattern'    => '<sup{atts}>{marker}</sup>',
 699          'nl_ref_pattern'     => '<sup{atts}>{marker}</sup>',
 700      );
 701  
 702      /**
 703       * Dimensionless images flag.
 704       *
 705       * @var bool
 706       */
 707  
 708      protected $dimensionless_images = false;
 709  
 710      /**
 711       * Directory separator.
 712       *
 713       * @var string
 714       */
 715  
 716      protected $ds = '/';
 717  
 718      /**
 719       * Whether mbstring extension is installed.
 720       *
 721       * @var bool
 722       */
 723  
 724      protected $mb;
 725  
 726      /**
 727       * Multi-byte conversion map.
 728       *
 729       * @var array
 730       */
 731  
 732      protected $cmap = array(0x0080, 0xffff, 0, 0xffff);
 733  
 734      /**
 735       * Stores note index.
 736       *
 737       * @var int
 738       */
 739  
 740      protected $note_index = 1;
 741  
 742      /**
 743       * Stores unreferenced notes.
 744       *
 745       * @var array
 746       */
 747  
 748      protected $unreferencedNotes = array();
 749  
 750      /**
 751       * Stores note lists.
 752       *
 753       * @var array
 754       */
 755  
 756      protected $notelist_cache = array();
 757  
 758      /**
 759       * Stores notes.
 760       *
 761       * @var array
 762       */
 763  
 764      protected $notes = array();
 765  
 766      /**
 767       * Stores URL references.
 768       *
 769       * @var array
 770       */
 771  
 772      protected $urlrefs = array();
 773  
 774      /**
 775       * Stores span depth.
 776       *
 777       * @var int
 778       */
 779  
 780      protected $span_depth = 0;
 781  
 782      /**
 783       * Unique ID used for reference tokens.
 784       *
 785       * @var string
 786       */
 787  
 788      protected $uid;
 789  
 790      /**
 791       * Token reference index.
 792       *
 793       * @var int
 794       */
 795  
 796      protected $refIndex = 1;
 797  
 798      /**
 799       * Stores references values.
 800       *
 801       * @var array
 802       */
 803  
 804      protected $refCache = array();
 805  
 806      /**
 807       * Matched open and closed quotes.
 808       *
 809       * @var array
 810       */
 811  
 812      protected $quotes = array(
 813          '"' => '"',
 814          "'" => "'",
 815          '(' => ')',
 816          '{' => '}',
 817          '[' => ']',
 818          '«' => '»',
 819          '»' => '«',
 820          '‹' => '›',
 821          '›' => '‹',
 822          '„' => '“',
 823          '‚' => '‘',
 824          '‘' => '’',
 825          '”' => '“',
 826      );
 827  
 828      /**
 829       * Regular expression that matches starting quotes.
 830       *
 831       * @var string
 832       */
 833  
 834      protected $quote_starts;
 835  
 836      /**
 837       * Ordered list starts.
 838       *
 839       * @var array
 840       */
 841  
 842      protected $olstarts = array();
 843  
 844      /**
 845       * Link prefix.
 846       *
 847       * @var string
 848       */
 849  
 850      protected $linkPrefix;
 851  
 852      /**
 853       * Link index.
 854       *
 855       * @var int
 856       */
 857  
 858      protected $linkIndex = 1;
 859  
 860      /**
 861       * Constructor.
 862       *
 863       * The constructor allows setting options that affect the
 864       * class instance as a whole, such as the output doctype.
 865       * To instruct the parser to return HTML5 markup instead of
 866       * XHTML, set $doctype argument to 'html5'.
 867       *
 868       * <code>
 869       * $parser = new \Netcarver\Textile\Parser('html5');
 870       * echo $parser->textileThis('HTML(HyperText Markup Language)");
 871       * </code>
 872       *
 873       * @param  string $doctype The output document type, either 'xhtml' or 'html5'
 874       * @throws \InvalidArgumentException
 875       * @api
 876       */
 877  
 878      public function __construct($doctype = 'xhtml')
 879      {
 880          $doctypes = array(
 881              'xhtml',
 882              'html5',
 883          );
 884  
 885          if (!in_array($doctype, $doctypes, true)) {
 886              throw new \InvalidArgumentException('Invalid doctype given.');
 887          } else {
 888              $this->doctype = $doctype;
 889          }
 890  
 891          $uid = uniqid(rand());
 892          $this->uid = 'textileRef:'.$uid.':';
 893          $this->linkPrefix = $uid.'-';
 894          $this->a = "(?:$this->hlgn|$this->vlgn)*";
 895          $this->s = "(?:$this->cspn|$this->rspn)*";
 896          $this->c = "(?:$this->clas|$this->styl|$this->lnge|$this->hlgn)*";
 897  
 898          $this->cls = '(?:'.
 899              "$this->clas(?:".
 900                  "$this->lnge(?:$this->styl)?|$this->styl(?:$this->lnge)?".
 901                  ')?|'.
 902              "$this->lnge(?:".
 903                  "$this->clas(?:$this->styl)?|$this->styl(?:$this->clas)?".
 904                  ')?|'.
 905              "$this->styl(?:".
 906                  "$this->clas(?:$this->lnge)?|$this->lnge(?:$this->clas)?".
 907                  ')?'.
 908              ')?';
 909  
 910          if ($this->isUnicodePcreSupported()) {
 911              $this->regex_snippets = array(
 912                  'acr'   => '\p{Lu}\p{Nd}',
 913                  'abr'   => '\p{Lu}',
 914                  'nab'   => '\p{Ll}',
 915                  'wrd'   => '(?:\p{L}|\p{M}|\p{N}|\p{Pc})',
 916                  'mod'   => 'u', // Make sure to mark the unicode patterns as such, Some servers seem to need this.
 917                  'cur'   => '\p{Sc}',
 918                  'digit' => '\p{N}',
 919                  'space' => '(?:\p{Zs}|\h|\v)',
 920                  'char'  => '(?:[^\p{Zs}\h\v])',
 921              );
 922          } else {
 923              $this->regex_snippets = array(
 924                  'acr'   => 'A-Z0-9',
 925                  'abr'   => 'A-Z',
 926                  'nab'   => 'a-z',
 927                  'wrd'   => '\w',
 928                  'mod'   => '',
 929                  'cur'   => '',
 930                  'digit' => '\d',
 931                  'space' => '(?:\s|\h|\v)',
 932                  'char'  => '\S',
 933              );
 934          }
 935          extract($this->regex_snippets);
 936          $this->urlch = '['.$wrd.'"$\-_.+!*\'(),";\/?:@=&%#{}|\\^~\[\]`]';
 937          $this->quote_starts = implode('|', array_map('preg_quote', array_keys($this->quotes)));
 938  
 939          if (defined('DIRECTORY_SEPARATOR')) {
 940              $this->ds = constant('DIRECTORY_SEPARATOR');
 941          }
 942  
 943          if (php_sapi_name() === 'cli') {
 944              $this->doc_root = getcwd();
 945          } elseif (!empty($_SERVER['DOCUMENT_ROOT'])) {
 946              $this->doc_root = $_SERVER['DOCUMENT_ROOT'];
 947          } elseif (!empty($_SERVER['PATH_TRANSLATED'])) {
 948              $this->doc_root = $_SERVER['PATH_TRANSLATED'];
 949          }
 950  
 951          $this->doc_root = rtrim($this->doc_root, $this->ds).$this->ds;
 952      }
 953  
 954      /**
 955       * Defines a substitution symbol.
 956       *
 957       * Call this you need to redefine a substitution symbol to
 958       * be used when parsing a Textile document.
 959       *
 960       * @param  string $name  Name of the symbol to assign a new value to.
 961       * @param  string $value New value for the symbol.
 962       * @return Parser
 963       * @api
 964       */
 965  
 966      public function setSymbol($name, $value)
 967      {
 968          $this->symbols[$name] = $value;
 969          $this->rebuild_glyphs = true;
 970          return $this;
 971      }
 972  
 973      /**
 974       * Gets a symbol definitions.
 975       *
 976       * This method can be used to get a symbol definition, or an
 977       * array containing the full symbol table.
 978       *
 979       * @param  string|null  $name The name of the symbol, or NULL if requesting the symbol table
 980       * @return array|string The symbol table or the requested symbol
 981       * @throws \InvalidArgumentException
 982       * @api
 983       */
 984  
 985      public function getSymbol($name = null)
 986      {
 987          if ($name !== null) {
 988              if (isset($this->symbols[$name])) {
 989                  return $this->symbols[$name];
 990              }
 991  
 992              throw new \InvalidArgumentException('The specified name does not match any symbols.');
 993          }
 994  
 995          return $this->symbols;
 996      }
 997  
 998      /**
 999       * Sets base image directory path.
1000       *
1001       * This is used when Textile is supplied with a relative image path.
1002       * Allows client systems to have PHP-Textile convert relative image paths to
1003       * absolute or prefixed paths. This method is used to set that base path,
1004       * usually a absolute HTTP address pointing to a directory.
1005       *
1006       * <code>
1007       * $parser = new \Netcarver\Textile\Parser();
1008       * $parser->setRelativeImagePrefix('http://static.example.com/');
1009       * </code>
1010       *
1011       * @param  string $prefix  The string to prefix all relative image paths with
1012       * @return Parser
1013       * @api
1014       */
1015  
1016      public function setRelativeImagePrefix($prefix = '')
1017      {
1018          $this->relativeImagePrefix = $prefix;
1019          return $this;
1020      }
1021  
1022      /**
1023       * Toggles image dimension attributes.
1024       *
1025       * If $dimensionless is set to TRUE, image width and height attributes
1026       * will not be included in rendered image tags. Normally, Textile will add
1027       * dimensions height images that specify a relative path, as long
1028       * as the image file can be accessed.
1029       *
1030       * <code>
1031       * $parser = new \Netcarver\Textile\Parser();
1032       * echo $parser->setDimensionlessImages(false)->textileThis('Hello World!');
1033       * </code>
1034       *
1035       * @param  bool   $dimensionless TRUE to disable image dimensions, FALSE to enable
1036       * @return Parser
1037       * @api
1038       */
1039  
1040      public function setDimensionlessImages($dimensionless = true)
1041      {
1042          $this->dimensionless_images = (bool) $dimensionless;
1043          return $this;
1044      }
1045  
1046      /**
1047       * Whether images will get dimensions or not.
1048       *
1049       * This method will return the state of
1050       * the state of the $dimensionless_images property.
1051       *
1052       * <code>
1053       * $parser = new \Netcarver\Textile\Parser();
1054       * if ($parser->getDimensionlessImages() === true)
1055       * {
1056       *     echo 'Images do not get dimensions.';
1057       * }
1058       * </code>
1059       *
1060       * @return bool TRUE if images will not get dimensions, FALSE otherwise
1061       * @api
1062       */
1063  
1064      public function getDimensionlessImages()
1065      {
1066          return (bool) $this->dimensionless_images;
1067      }
1068  
1069      /**
1070       * Gets Textile version number.
1071       *
1072       * <code>
1073       * $parser = new \Netcarver\Textile\Parser();
1074       * echo $parser->getVersion();
1075       * </code>
1076       *
1077       * @return string Version
1078       * @api
1079       */
1080  
1081      public function getVersion()
1082      {
1083          return $this->ver;
1084      }
1085  
1086      /**
1087       * Encodes the given text.
1088       *
1089       * <code>
1090       * $parser = new \Netcarver\Textile\Parser();
1091       * $parser->textileEncode('Some content to encode.');
1092       * </code>
1093       *
1094       * @param  string $text The text to be encoded
1095       * @return string The encoded text
1096       * @api
1097       */
1098  
1099      public function textileEncode($text)
1100      {
1101          $text = preg_replace("/&(?![#a-z0-9]+;)/i", "x%x%", $text);
1102          $text = str_replace("x%x%", "&amp;", $text);
1103          return $text;
1104      }
1105  
1106      /**
1107       * Parses the given Textile input in un-restricted mode.
1108       *
1109       * This method should be used to parse any trusted Textile
1110       * input, such as articles created by well-known
1111       * authorised users.
1112       *
1113       * This method allows users to mix raw HTML and Textile.
1114       * If you want to parse untrusted input, see the
1115       * textileRestricted method instead. Using this less
1116       * restrictive method on untrusted input, like comments
1117       * and forum posts, will lead to XSS issues, as users
1118       * will be able to use any HTML code, JavaScript links
1119       * and Textile attributes in their input.
1120       *
1121       * <code>
1122       * $parser = new \Netcarver\Textile\Parser();
1123       * echo $parser->textileThis('h1. Hello World!');
1124       * </code>
1125       *
1126       * @param  string $text    The Textile input to parse
1127       * @param  bool   $lite    Switch to lite mode
1128       * @param  bool   $encode  Encode input and return
1129       * @param  bool   $noimage Disables images
1130       * @param  bool   $strict  This argument is ignored
1131       * @param  string $rel     Relationship attribute applied to generated links
1132       * @return string Parsed $text
1133       * @see    Parser::textileRestricted()
1134       * @api
1135       */
1136  
1137      public function textileThis($text, $lite = false, $encode = false, $noimage = false, $strict = false, $rel = '')
1138      {
1139          $this->prepare($lite, $noimage, $rel);
1140          $this->url_schemes = $this->unrestricted_url_schemes;
1141  
1142          if ($encode) {
1143              trigger_error(
1144                  'Use of the $encode argument is discouraged. Use Parser::textileEncode() instead.',
1145                  E_USER_DEPRECATED
1146              );
1147              return $this->textileEncode($text);
1148          }
1149  
1150          return $this->textileCommon($text, $lite);
1151      }
1152  
1153      /**
1154       * Parses the given Textile input in restricted mode.
1155       *
1156       * This method should be used for any untrusted user input,
1157       * including comments or forum posts.
1158       *
1159       * This method escapes any raw HTML input, ignores unsafe
1160       * attributes, links only whitelisted URL schemes
1161       * and by default also prevents the use of images and
1162       * extra Textile formatting, accepting only paragraphs
1163       * and blockquotes as valid block tags.
1164       *
1165       * <code>
1166       * $parser = new \Netcarver\Textile\Parser();
1167       * echo $parser->textileRestricted('h1. Hello World!');
1168       * </code>
1169       *
1170       * @param  string $text    The Textile input to parse
1171       * @param  bool   $lite    Controls lite mode, allowing extra formatting
1172       * @param  bool   $noimage Allow images
1173       * @param  string $rel     Relationship attribute applied to generated links
1174       * @return string Parsed $text
1175       * @see    Parser::textileThis()
1176       * @api
1177       */
1178  
1179      public function textileRestricted($text, $lite = true, $noimage = true, $rel = 'nofollow')
1180      {
1181          $this->prepare($lite, $noimage, $rel);
1182          $this->url_schemes = $this->restricted_url_schemes;
1183          $this->restricted = true;
1184  
1185          // Escape any raw html
1186          $text = $this->encodeHTML($text, 0);
1187  
1188          return $this->textileCommon($text, $lite);
1189      }
1190  
1191      /**
1192       * Parses Textile syntax.
1193       *
1194       * This method performs common parse actions.
1195       *
1196       * @param  string $text The input to parses
1197       * @param  bool   $lite Controls lite mode
1198       * @return string Parsed input
1199       */
1200  
1201      protected function textileCommon($text, $lite)
1202      {
1203          $text = $this->cleanWhiteSpace($text);
1204          $text = $this->cleanUniqueTokens($text);
1205  
1206          if ($lite) {
1207              $this->blocktag_whitelist = array('bq', 'p');
1208              $text = $this->blocks($text."\n\n");
1209          } else {
1210              $this->blocktag_whitelist = array(
1211                  'bq',
1212                  'p',
1213                  'bc',
1214                  'notextile',
1215                  'pre',
1216                  'h[1-6]',
1217                  'fn'.$this->regex_snippets['digit'].'+',
1218                  '###',
1219              );
1220              $text = $this->blocks($text);
1221              $text = $this->placeNoteLists($text);
1222          }
1223  
1224          $text = $this->retrieve($text);
1225          $text = $this->replaceGlyphs($text);
1226          $text = $this->retrieveTags($text);
1227          $text = $this->retrieveURLs($text);
1228  
1229          $text = str_replace("<br />", "<br />\n", $text);
1230  
1231          return $text;
1232      }
1233  
1234      /**
1235       * Prepares the glyph patterns from the symbol table.
1236       *
1237       * @see Parser::setSymbol()
1238       * @see Parser::getSymbol()
1239       */
1240  
1241      protected function prepGlyphs()
1242      {
1243          if ($this->rebuild_glyphs === false) {
1244              return;
1245          }
1246  
1247          extract($this->symbols, EXTR_PREFIX_ALL, 'txt');
1248          extract($this->regex_snippets);
1249          $pnc = '[[:punct:]]';
1250  
1251          if ($cur) {
1252              $cur = '(?:['.$cur.']'.$space.'*)?';
1253          }
1254  
1255          $this->glyph_search = array();
1256          $this->glyph_replace = array();
1257  
1258          // Dimension sign
1259          $this->glyph_search[] = '/([0-9]+[\])]?[\'"]? ?)[xX]( ?[\[(]?)(?=[+-]?'.$cur.'[0-9]*\.?[0-9]+)/'.$mod;
1260          $this->glyph_replace[] = '$1'.$txt_dimension.'$2';
1261  
1262          // Apostrophe
1263          $this->glyph_search[] = '/('.$wrd.'|\))\'('.$wrd.')/'.$mod;
1264          $this->glyph_replace[] = '$1'.$txt_apostrophe.'$2';
1265  
1266          // Back in '88/the '90s but not in his '90s', '1', '1.' '10m' or '5.png'
1267          $this->glyph_search[] = '/('.$space.')\'(\d+'.$wrd.'?)\b(?![.]?['.$wrd.']*?\')/'.$mod;
1268          $this->glyph_replace[] = '$1'.$txt_apostrophe.'$2';
1269  
1270          // Single open following open bracket
1271          $this->glyph_search[] = "/([([{])'(?=\S)/".$mod;
1272          $this->glyph_replace[] = '$1'.$txt_quote_single_open;
1273  
1274          // Single closing
1275          $this->glyph_search[] = '/(\S)\'(?='.$space.'|'.$pnc.'|<|$)/'.$mod;
1276          $this->glyph_replace[] = '$1'.$txt_quote_single_close;
1277  
1278          // Default single opening
1279          $this->glyph_search[] = "/'/";
1280          $this->glyph_replace[] = $txt_quote_single_open;
1281  
1282          // Double open following an open bracket. Allows things like Hello ["(Mum) & dad"]
1283          $this->glyph_search[] = '/([([{])"(?=\S)/'.$mod;
1284          $this->glyph_replace[] = '$1'.$txt_quote_double_open;
1285  
1286          // Double closing
1287          $this->glyph_search[] = '/(\S)"(?='.$space.'|'.$pnc.'|<|$)/'.$mod;
1288          $this->glyph_replace[] = '$1'.$txt_quote_double_close;
1289  
1290          // Default double opening
1291          $this->glyph_search[] = '/"/';
1292          $this->glyph_replace[] = $txt_quote_double_open;
1293  
1294          // 3+ uppercase acronym
1295          $this->glyph_search[] = '/\b(['.$abr.']['.$acr.']{2,})\b(?:[(]([^)]*)[)])/'.$mod;
1296  
1297          if ($this->doctype === 'html5') {
1298              $this->glyph_replace[] = '<abbr title="$2">$1</abbr>';
1299          } else {
1300              $this->glyph_replace[] = '<acronym title="$2">$1</acronym>';
1301          }
1302  
1303          // 3+ uppercase
1304          $this->glyph_search[] = '/('.$space.'|^|[>(;-])(['.$abr.']{3,})'.
1305              '(['.$nab.']*)(?='.$space.'|'.$pnc.'|<|$)(?=[^">]*?(<|$))/'.$mod;
1306          $this->glyph_replace[] = '$1<span class="caps">'.$this->uid.':glyph:$2</span>$3';
1307  
1308          // Ellipsis
1309          $this->glyph_search[] = '/([^.]?)\.{3}/';
1310          $this->glyph_replace[] = '$1'.$txt_ellipsis;
1311  
1312          // em dash
1313          $this->glyph_search[] = '/--/';
1314          $this->glyph_replace[] = $txt_emdash;
1315  
1316          // en dash
1317          $this->glyph_search[] = '/ - /';
1318          $this->glyph_replace[] = ' '.$txt_endash.' ';
1319  
1320          // Trademark
1321          $this->glyph_search[] = '/(\b ?|'.$space.'|^)[([]TM[])]/i'.$mod;
1322          $this->glyph_replace[] = '$1'.$txt_trademark;
1323  
1324          // Registered
1325          $this->glyph_search[] = '/(\b ?|'.$space.'|^)[([]R[])]/i'.$mod;
1326          $this->glyph_replace[] = '$1'.$txt_registered;
1327  
1328          // Copyright
1329          $this->glyph_search[] = '/(\b ?|'.$space.'|^)[([]C[])]/i'.$mod;
1330          $this->glyph_replace[] = '$1'.$txt_copyright;
1331  
1332          // 1/4
1333          $this->glyph_search[] = '/[([]1\/4[])]/';
1334          $this->glyph_replace[] = $txt_quarter;
1335  
1336          // 1/2
1337          $this->glyph_search[] = '/[([]1\/2[])]/';
1338          $this->glyph_replace[] = $txt_half;
1339  
1340          // 3/4
1341          $this->glyph_search[] = '/[([]3\/4[])]/';
1342          $this->glyph_replace[] = $txt_threequarters;
1343  
1344          // Degrees -- that's a small 'oh'
1345          $this->glyph_search[] = '/[([]o[])]/';
1346          $this->glyph_replace[] = $txt_degrees;
1347  
1348          // Plus minus
1349          $this->glyph_search[] = '/[([]\+\/-[])]/';
1350          $this->glyph_replace[] = $txt_plusminus;
1351  
1352          // No need to rebuild next run unless a symbol is redefined
1353          $this->rebuild_glyphs = false;
1354      }
1355  
1356      /**
1357       * Sets the maximum allowd link index.
1358       *
1359       * @return int
1360       * @since 3.5.5
1361       */
1362  
1363      protected function getMaxLinkIndex()
1364      {
1365          return 1000000;
1366      }
1367  
1368      /**
1369       * Prepares the parser for parsing.
1370       *
1371       * This method prepares the transient internal state of
1372       * Textile parser in preparation for parsing a new document.
1373       *
1374       * @param  bool   $lite    Controls lite mode
1375       * @param  bool   $noimage Disallow images
1376       * @param  string $rel     A relationship attribute applied to links
1377       */
1378  
1379      protected function prepare($lite, $noimage, $rel)
1380      {
1381          if ($this->linkIndex >= $this->getMaxLinkIndex()) {
1382              $this->linkPrefix .= '-';
1383              $this->linkIndex = 1;
1384          }
1385  
1386          $this->unreferencedNotes = array();
1387          $this->notelist_cache    = array();
1388          $this->notes      = array();
1389          $this->urlrefs    = array();
1390          $this->shelf      = array();
1391          $this->fn         = array();
1392          $this->span_depth = 0;
1393          $this->refIndex   = 1;
1394          $this->refCache   = array();
1395          $this->note_index = 1;
1396          $this->rel        = $rel;
1397          $this->lite       = $lite;
1398          $this->noimage    = $noimage;
1399          $this->prepGlyphs();
1400      }
1401  
1402      /**
1403       * Cleans a HTML attribute value.
1404       *
1405       * This method checks for presence of URL encoding in the value.
1406       * If the number encoded characters exceeds the thereshold,
1407       * the input is discarded. Otherwise the encoded
1408       * instances are decoded.
1409       *
1410       * This method also strips any ", ' and = characters
1411       * from the given value. This method does not guarantee
1412       * valid HTML or full sanitization.
1413       *
1414       * @param  string $in The input string
1415       * @return string Cleaned string
1416       */
1417  
1418      protected function cleanAttribs($in)
1419      {
1420          $tmp    = $in;
1421          $before = -1;
1422          $after  =  0;
1423          $max    =  3;
1424          $i      =  0;
1425  
1426          while (($after != $before) && ($i < $max)) {
1427              $before = strlen($tmp);
1428              $tmp    = rawurldecode($tmp);
1429              $after  = strlen($tmp);
1430              $i++;
1431          }
1432  
1433          if ($i === $max) {
1434              // If we hit the max allowed decodes, assume the input is tainted and consume it.
1435              $out = '';
1436          } else {
1437              $out = str_replace(array('"', "'", '='), '', $tmp);
1438          }
1439  
1440          return $out;
1441      }
1442  
1443      /**
1444       * Constructs a HTML tag from an object.
1445       *
1446       * This is a helper method that creates a new
1447       * instance of \Netcarver\Textile\Tag.
1448       *
1449       * @param  string $name        The HTML element name
1450       * @param  array  $atts        HTML attributes applied to the tag
1451       * @param  bool   $selfclosing Determines if the tag should be selfclosing
1452       * @return Tag
1453       */
1454  
1455      protected function newTag($name, $atts, $selfclosing = true)
1456      {
1457          return new Tag($name, $atts, $selfclosing);
1458      }
1459  
1460      /**
1461       * Parses Textile attributes.
1462       *
1463       * @param  string $in         The Textile attribute string to be parsed
1464       * @param  string $element    Focus the routine to interpret the attributes as applying to a specific HTML tag
1465       * @param  bool   $include_id If FALSE, IDs are not included in the attribute list
1466       * @param  string $autoclass  An additional classes applied to the output
1467       * @return string HTML attribute list
1468       * @see    Parser::parseAttribsToArray()
1469       */
1470  
1471      protected function parseAttribs($in, $element = '', $include_id = true, $autoclass = '')
1472      {
1473          $o = $this->parseAttribsToArray($in, $element, $include_id, $autoclass);
1474  
1475          return $this->formatAttributeString($o);
1476      }
1477  
1478      /**
1479       * Converts an array of named attribute => value mappings to a string.
1480       *
1481       * @param array $attribute_array
1482       * @return string
1483       */
1484  
1485      protected function formatAttributeString(array $attribute_array)
1486      {
1487          $out = '';
1488  
1489          if (count($attribute_array)) {
1490              foreach ($attribute_array as $k => $v) {
1491                  $out .= " $k=\"$v\"";
1492              }
1493          }
1494  
1495          return $out;
1496      }
1497  
1498      /**
1499       * Parses Textile attributes into an array.
1500       *
1501       * @param  string $in         The Textile attribute string to be parsed
1502       * @param  string $element    Focus the routine to interpret the attributes as applying to a specific HTML tag
1503       * @param  bool   $include_id If FALSE, IDs are not included in the attribute list
1504       * @param  string $autoclass  An additional classes applied to the output
1505       * @return array  HTML attributes as key => value mappings
1506       * @see    Parser::parseAttribs()
1507       */
1508  
1509      protected function parseAttribsToArray($in, $element = '', $include_id = true, $autoclass = '')
1510      {
1511          $style = '';
1512          $class = '';
1513          $lang = '';
1514          $colspan = '';
1515          $rowspan = '';
1516          $span = '';
1517          $width = '';
1518          $id = '';
1519          $atts = '';
1520          $align = '';
1521  
1522          $matched = $in;
1523          if ($element == 'td') {
1524              if (preg_match("/\\\\([0-9]+)/", $matched, $csp)) {
1525                  $colspan = $csp[1];
1526              }
1527  
1528              if (preg_match("/\/([0-9]+)/", $matched, $rsp)) {
1529                  $rowspan = $rsp[1];
1530              }
1531          }
1532  
1533          if ($element == 'td' or $element == 'tr') {
1534              if (preg_match("/($this->vlgn)/", $matched, $vert)) {
1535                  $style[] = "vertical-align:" . $this->vAlign($vert[1]);
1536              }
1537          }
1538  
1539          if (preg_match("/\{([^}]*)\}/", $matched, $sty)) {
1540              if ($sty[1] = $this->cleanAttribs($sty[1])) {
1541                  $style[] = rtrim($sty[1], ';');
1542              }
1543              $matched = str_replace($sty[0], '', $matched);
1544          }
1545  
1546          if (preg_match("/\[([^]]+)\]/U", $matched, $lng)) {
1547              // Consume entire lang block -- valid or invalid.
1548              $matched = str_replace($lng[0], '', $matched);
1549              if (preg_match("/\[([a-zA-Z]{2}(?:[\-\_][a-zA-Z]{2})?)\]/U", $lng[0], $lng)) {
1550                  $lang = $lng[1];
1551              }
1552          }
1553  
1554          if (preg_match("/\(([^()]+)\)/U", $matched, $cls)) {
1555  
1556              $class_regex = "/^([-a-zA-Z 0-9_\.]*)$/";
1557  
1558              // Consume entire class block -- valid or invalid.
1559              $matched = str_replace($cls[0], '', $matched);
1560  
1561              // Only allow a restricted subset of the CSS standard characters for classes/ids.
1562              // No encoding markers allowed.
1563              if (preg_match("/\(([-a-zA-Z 0-9_\.\:\#]+)\)/U", $cls[0], $cls)) {
1564                  $hashpos = strpos($cls[1], '#');
1565                  // If a textile class block attribute was found with a '#' in it
1566                  // split it into the css class and css id...
1567                  if (false !== $hashpos) {
1568                      if (preg_match("/#([-a-zA-Z0-9_\.\:]*)$/", substr($cls[1], $hashpos), $ids)) {
1569                          $id = $ids[1];
1570                      }
1571  
1572                      if (preg_match($class_regex, substr($cls[1], 0, $hashpos), $ids)) {
1573                          $class = $ids[1];
1574                      }
1575                  } else {
1576                      if (preg_match($class_regex, $cls[1], $ids)) {
1577                          $class = $ids[1];
1578                      }
1579                  }
1580              }
1581          }
1582  
1583          if (preg_match("/([(]+)/", $matched, $pl)) {
1584              $style[] = "padding-left:" . strlen($pl[1]) . "em";
1585              $matched = str_replace($pl[0], '', $matched);
1586          }
1587  
1588          if (preg_match("/([)]+)/", $matched, $pr)) {
1589              $style[] = "padding-right:" . strlen($pr[1]) . "em";
1590              $matched = str_replace($pr[0], '', $matched);
1591          }
1592  
1593          if (preg_match("/($this->hlgn)/", $matched, $horiz)) {
1594              $style[] = "text-align:" . $this->hAlign($horiz[1]);
1595          }
1596  
1597          if ($element == 'col') {
1598              if (preg_match("/(?:\\\\([0-9]+))?{$this->regex_snippets['space']}*([0-9]+)?/", $matched, $csp)) {
1599                  $span = isset($csp[1]) ? $csp[1] : '';
1600                  $width = isset($csp[2]) ? $csp[2] : '';
1601              }
1602          }
1603  
1604          if ($this->restricted) {
1605              $o = array();
1606              $class = trim($autoclass);
1607              if ($class) {
1608                  $o['class'] = $this->cleanAttribs($class);
1609              }
1610  
1611              if ($lang) {
1612                  $o['lang']  = $this->cleanAttribs($lang);
1613              }
1614  
1615              ksort($o);
1616              return $o;
1617          } else {
1618              $class = trim($class . ' ' . $autoclass);
1619          }
1620  
1621          $o = array();
1622          if ($class) {
1623              $o['class'] = $this->cleanAttribs($class);
1624          }
1625  
1626          if ($colspan) {
1627              $o['colspan'] = $this->cleanAttribs($colspan);
1628          }
1629  
1630          if ($id && $include_id) {
1631              $o['id'] = $this->cleanAttribs($id);
1632          }
1633  
1634          if ($lang) {
1635              $o['lang'] = $this->cleanAttribs($lang);
1636          }
1637  
1638          if ($rowspan) {
1639              $o['rowspan'] = $this->cleanAttribs($rowspan);
1640          }
1641  
1642          if ($span) {
1643              $o['span'] = $this->cleanAttribs($span);
1644          }
1645  
1646          if ($style) {
1647              $so = '';
1648              $tmps = array();
1649              foreach ($style as $s) {
1650                  $parts = explode(';', $s);
1651                  foreach ($parts as $p) {
1652                      if ($p = trim(trim($p), ":")) {
1653                          $tmps[] = $p;
1654                      }
1655                  }
1656              }
1657  
1658              sort($tmps);
1659              foreach ($tmps as $p) {
1660                  if (!empty($p)) {
1661                      $so .= $p.';';
1662                  }
1663              }
1664              $style = trim(str_replace(array("\n", ';;'), array('', ';'), $so));
1665  
1666              $o['style'] = $style;
1667          }
1668  
1669          if ($width) {
1670              $o['width'] = $this->cleanAttribs($width);
1671          }
1672  
1673          ksort($o);
1674          return $o;
1675      }
1676  
1677      /**
1678       * Checks whether the text is not enclosed by a block tag.
1679       *
1680       * @param  string $text The input string
1681       * @return bool   TRUE if the text is not enclosed
1682       */
1683  
1684      protected function hasRawText($text)
1685      {
1686          $r = preg_replace(
1687              '@<(p|hr|br|img|blockquote|div|form|table|ul|ol|dl|pre|h[1-6])[^>]*?'.chr(62).'.*</\1[^>]*?>@si',
1688              '',
1689              trim($text)
1690          );
1691          $r = trim(preg_replace('@<(br|hr|img)[^>]*?/?>@i', '', trim($r)));
1692          return '' != $r;
1693      }
1694  
1695      /**
1696       * Parses textile table structures into HTML.
1697       *
1698       * @param  string $text The textile input
1699       * @return string The parsed text
1700       */
1701  
1702      protected function tables($text)
1703      {
1704          $text = $text . "\n\n";
1705          return preg_replace_callback(
1706              "/^(?:table(?P<tatts>_?{$this->s}{$this->a}{$this->cls})\.".
1707              "(?P<summary>.*)?\n)?^(?P<rows>{$this->a}{$this->cls}\.? ?\|.*\|){$this->regex_snippets['space']}*\n\n/smU",
1708              array(&$this, "fTable"),
1709              $text
1710          );
1711      }
1712  
1713      /**
1714       * Constructs a HTML table from a textile table structure.
1715       *
1716       * This method is used by Parser::tables() to process
1717       * found table structures.
1718       *
1719       * @param  array  $matches
1720       * @return string HTML table
1721       * @see    Parser::tables()
1722       */
1723  
1724      protected function fTable($matches)
1725      {
1726          $tatts = $this->parseAttribs($matches['tatts'], 'table');
1727          $space = $this->regex_snippets['space'];
1728  
1729          $sum = trim($matches['summary'])
1730              ? ' summary="'.htmlspecialchars(trim($matches['summary']), ENT_QUOTES, 'UTF-8').'"'
1731              : '';
1732          $cap = '';
1733          $colgrp = '';
1734          $last_rgrp = '';
1735          $c_row = 1;
1736  
1737          foreach (preg_split("/\|{$space}*?$/m", $matches['rows'], -1, PREG_SPLIT_NO_EMPTY) as $row) {
1738  
1739              $row = ltrim($row);
1740  
1741              // Caption -- can only occur on row 1, otherwise treat '|=. foo |...'
1742              // as a normal center-aligned cell.
1743              if (($c_row <= 1) && preg_match(
1744                  "/^\|\=(?P<capts>$this->s$this->a$this->cls)\. (?P<cap>[^\n]*)(?P<row>.*)/s",
1745                  ltrim($row),
1746                  $cmtch
1747              )) {
1748                  $capts = $this->parseAttribs($cmtch['capts']);
1749                  $cap = "\t<caption".$capts.">".trim($cmtch['cap'])."</caption>\n";
1750                  $row = ltrim($cmtch['row']);
1751                  if (empty($row)) {
1752                      continue;
1753                  }
1754              }
1755  
1756              $c_row += 1;
1757  
1758              // Colgroup
1759              if (preg_match("/^\|:(?P<cols>$this->s$this->a$this->cls\. .*)/m", ltrim($row), $gmtch)) {
1760                  // Is this colgroup def missing a closing pipe? If so, there
1761                  // will be a newline in the middle of $row somewhere.
1762                  $nl = strpos($row, "\n");
1763                  $idx = 0;
1764  
1765                  foreach (explode('|', str_replace('.', '', $gmtch['cols'])) as $col) {
1766                      $gatts = $this->parseAttribs(trim($col), 'col');
1767                      $colgrp .= "\t<col".(($idx==0) ? "group".$gatts.">" : $gatts." />")."\n";
1768                      $idx++;
1769                  }
1770  
1771                  $colgrp .= "\t</colgroup>\n";
1772  
1773                  if ($nl === false) {
1774                      continue;
1775                  } else {
1776                      // Recover from our missing pipe and process the rest of the line.
1777                      $row = ltrim(substr($row, $nl));
1778                  }
1779              }
1780  
1781              // Row group
1782              $rgrpatts = $rgrp = '';
1783  
1784              if (preg_match(
1785                  "/(:?^\|(?P<part>$this->vlgn)(?P<rgrpatts>$this->s$this->a$this->cls)\.{$space}*$\n)?^(?P<row>.*)/sm",
1786                  ltrim($row),
1787                  $grpmatch
1788              )) {
1789                  if (isset($grpmatch['part'])) {
1790                      if ($grpmatch['part'] === '^') {
1791                          $rgrp = 'head';
1792                      } elseif ($grpmatch['part'] === '~') {
1793                          $rgrp = 'foot';
1794                      } elseif ($grpmatch['part'] === '-') {
1795                          $rgrp = 'body';
1796                      }
1797                  }
1798  
1799                  if (isset($grpmatch['part'])) {
1800                      $rgrpatts = $this->parseAttribs($grpmatch['rgrpatts']);
1801                  }
1802  
1803                  if (isset($grpmatch['row'])) {
1804                      $row = $grpmatch['row'];
1805                  }
1806              }
1807  
1808              if (preg_match("/^(?P<ratts>$this->a$this->cls\. )(?P<row>.*)/m", ltrim($row), $rmtch)) {
1809                  $ratts = $this->parseAttribs($rmtch['ratts'], 'tr');
1810                  $row = $rmtch['row'];
1811              } else {
1812                  $ratts = '';
1813              }
1814  
1815              $cells = array();
1816              $cellctr = 0;
1817  
1818              foreach (explode("|", $row) as $cell) {
1819                  $ctyp = "d";
1820  
1821                  if (preg_match("/^_(?=[{$this->regex_snippets['space']}[:punct:]])/", $cell)) {
1822                      $ctyp = "h";
1823                  }
1824  
1825                  if (preg_match("/^(?P<catts>_?$this->s$this->a$this->cls\. )(?P<cell>.*)/s", $cell, $cmtch)) {
1826                      $catts = $this->parseAttribs($cmtch['catts'], 'td');
1827                      $cell = $cmtch['cell'];
1828                  } else {
1829                      $catts = '';
1830                  }
1831  
1832                  if (!$this->lite) {
1833                      $a = array();
1834  
1835                      if (preg_match('/(?<space>'.$this->regex_snippets['space'].'*)(?P<cell>.*)/s', $cell, $a)) {
1836                          $cell = $this->redclothLists($a['cell']);
1837                          $cell = $this->textileLists($cell);
1838                          $cell = $a['space'] . $cell;
1839                      }
1840                  }
1841  
1842                  if ($cellctr > 0) {
1843                      // Ignore first 'cell': it precedes the opening pipe
1844                      $cells[] = $this->doTagBr("t$ctyp", "\t\t\t<t$ctyp$catts>$cell</t$ctyp>");
1845                  }
1846  
1847                  $cellctr++;
1848              }
1849  
1850              $grp = '';
1851  
1852              if ($rgrp && $last_rgrp) {
1853                  $grp .= "\t</t".$last_rgrp.">\n";
1854              }
1855  
1856              if ($rgrp) {
1857                  $grp .= "\t<t".$rgrp.$rgrpatts.">\n";
1858              }
1859  
1860              $last_rgrp = ($rgrp) ? $rgrp : $last_rgrp;
1861              $rows[] = $grp."\t\t<tr$ratts>\n" . join("\n", $cells) . ($cells ? "\n" : "") . "\t\t</tr>";
1862              unset($cells, $catts);
1863          }
1864  
1865          $rows = join("\n", $rows) . "\n";
1866          $close = '';
1867  
1868          if ($last_rgrp) {
1869              $close = "\t</t".$last_rgrp.">\n";
1870          }
1871  
1872          return "<table{$tatts}{$sum}>\n".$cap.$colgrp.$rows.$close."</table>\n\n";
1873      }
1874  
1875      /**
1876       * Parses RedCloth-style definition lists into HTML.
1877       *
1878       * @param  string $text The textile input
1879       * @return string The parsed text
1880       */
1881  
1882      protected function redclothLists($text)
1883      {
1884          return preg_replace_callback(
1885              "/^([-]+$this->cls[ .].*:=.*)$(?![^-])/smU",
1886              array(&$this, "fRedclothList"),
1887              $text
1888          );
1889      }
1890  
1891      /**
1892       * Constructs a HTML definition list from a RedCloth-style definition structure.
1893       *
1894       * This method is used by Parser::redclothLists() to process
1895       * found definition list structures.
1896       *
1897       * @param  array  $m
1898       * @return string HTML definition list
1899       * @see    Parser::redclothLists()
1900       */
1901  
1902      protected function fRedclothList($m)
1903      {
1904          $in = $m[0];
1905          $out = array();
1906          $text = preg_split('/\n(?=[-])/m', $in);
1907          foreach ($text as $nr => $line) {
1908              $m = array();
1909              if (preg_match("/^[-]+(?P<atts>$this->cls)\.? (?P<content>.*)$/s", $line, $m)) {
1910                  $content = trim($m['content']);
1911                  $atts = $this->parseAttribs($m['atts']);
1912  
1913                  if (!preg_match(
1914                      "/^(.*?){$this->regex_snippets['space']}*:=(.*?)".
1915                      "{$this->regex_snippets['space']}*(=:|:=)?".
1916                      "{$this->regex_snippets['space']}*$/s",
1917                      $content,
1918                      $xm
1919                  )) {
1920                      $xm = array( $content, $content, '' );
1921                  }
1922  
1923                  list(, $term, $def,) = $xm;
1924                  $term = trim($term);
1925                  $def  = trim($def, ' ');
1926  
1927                  if (empty($out)) {
1928                      if (''==$def) {
1929                          $out[] = "<dl$atts>";
1930                      } else {
1931                          $out[] = '<dl>';
1932                      }
1933                  }
1934  
1935                  if ('' != $term) {
1936                      $pos = strpos($def, "\n");
1937                      $def = str_replace("\n", "<br />", trim($def));
1938                      if (0 === $pos) {
1939                          $def  = '<p>' . $def . '</p>';
1940                      }
1941                      $term = str_replace("\n", "<br />", $term);
1942  
1943                      $term = $this->graf($term);
1944                      $def  = $this->graf($def);
1945  
1946                      $out[] = "\t<dt$atts>$term</dt>";
1947  
1948                      if ($def) {
1949                          $out[] = "\t<dd>$def</dd>";
1950                      }
1951                  }
1952              }
1953          }
1954          $out[] = '</dl>';
1955          return implode("\n", $out);
1956      }
1957  
1958      /**
1959       * Parses Textile list structures into HTML.
1960       *
1961       * Searches for ordered, un-ordered and definition lists in the
1962       * textile input and generates HTML lists for them.
1963       *
1964       * @param  string $text The input
1965       * @return string The parsed text
1966       */
1967  
1968      protected function textileLists($text)
1969      {
1970          return preg_replace_callback(
1971              "/^((?:[*;:]+|[*;:#]*#(?:_|\d+)?)$this->cls[ .].*)$(?![^#*;:])/smU",
1972              array(&$this, "fTextileList"),
1973              $text
1974          );
1975      }
1976  
1977      /**
1978       * Constructs a HTML list from a Textile list structure.
1979       *
1980       * This method is used by Parser::textileLists() to process
1981       * found list structures.
1982       *
1983       * @param  array  $m
1984       * @return string HTML list
1985       * @see    Parser::textileLists()
1986       */
1987  
1988      protected function fTextileList($m)
1989      {
1990          $text = preg_split('/\n(?=[*#;:])/m', $m[0]);
1991          $pt = '';
1992          foreach ($text as $nr => $line) {
1993              $nextline = isset($text[$nr+1]) ? $text[$nr+1] : false;
1994              if (preg_match("/^(?P<tl>[#*;:]+)(?P<st>_|\d+)?(?P<atts>$this->cls)[ .](?P<content>.*)$/s", $line, $m)) {
1995                  $tl = $m['tl'];
1996                  $st = $m['st'];
1997                  $atts = $m['atts'];
1998                  $content = trim($m['content']);
1999                  $nl = '';
2000                  $ltype = $this->liType($tl);
2001                  $litem = (strpos($tl, ';') !== false) ? 'dt' : ((strpos($tl, ':') !== false) ? 'dd' : 'li');
2002                  $showitem = (strlen($content) > 0);
2003  
2004                  if ('o' === $ltype) {
2005                      // Handle list continuation/start attribute on ordered lists.
2006                      if (!isset($this->olstarts[$tl])) {
2007                          $this->olstarts[$tl] = 1;
2008                      }
2009  
2010                      if (strlen($tl) > strlen($pt)) {
2011                          // First line of this level of ol -- has a start attribute?
2012                          if ('' == $st) {
2013                              // No => reset count to 1.
2014                              $this->olstarts[$tl] = 1;
2015                          } elseif ('_' !== $st) {
2016                              // Yes, and numeric => reset to given.
2017                              // TRICKY: the '_' continuation marker just means
2018                              // output the count so don't need to do anything
2019                              // here.
2020                              $this->olstarts[$tl] = (int) $st;
2021                          }
2022                      }
2023  
2024                      if ((strlen($tl) > strlen($pt)) && '' !== $st) {
2025                          // Output the start attribute if needed.
2026                          $st = ' start="' . $this->olstarts[$tl] . '"';
2027                      }
2028  
2029                      if ($showitem) {
2030                          // TRICKY: Only increment the count for list items;
2031                          // not when a list definition line is encountered.
2032                          $this->olstarts[$tl] += 1;
2033                      }
2034                  }
2035  
2036                  if (preg_match("/^(?P<nextlistitem>[#*;:]+)(_|[\d]+)?($this->cls)[ .].*/", $nextline, $nm)) {
2037                      $nl = $nm['nextlistitem'];
2038                  }
2039  
2040                  if ((strpos($pt, ';') !== false) && (strpos($tl, ':') !== false)) {
2041                      // We're already in a <dl> so flag not to start another
2042                      $lists[$tl] = 2;
2043                  }
2044  
2045                  $tabs = str_repeat("\t", strlen($tl)-1);
2046                  $atts = $this->parseAttribs($atts);
2047                  if (!isset($lists[$tl])) {
2048                      $lists[$tl] = 1;
2049                      $line = "$tabs<" . $ltype . "l$atts$st>" . (($showitem) ? "\n$tabs\t<$litem>" . $content : '');
2050                  } else {
2051                      $line = ($showitem) ? "$tabs\t<$litem$atts>" . $content : '';
2052                  }
2053  
2054                  if ((strlen($nl) <= strlen($tl))) {
2055                      $line .= (($showitem) ? "</$litem>" : '');
2056                  }
2057  
2058                  foreach (array_reverse($lists) as $k => $v) {
2059                      if (strlen($k) > strlen($nl)) {
2060                          $line .= ($v==2) ? '' : "\n$tabs</" . $this->liType($k) . "l>";
2061                          if ((strlen($k) > 1) && ($v != 2)) {
2062                              $line .= "</".$litem.">";
2063                          }
2064                          unset($lists[$k]);
2065                      }
2066                  }
2067                  $pt = $tl; // Remember the current Textile tag
2068              }
2069  
2070              $out[] = $line;
2071          }
2072  
2073          $out = implode("\n", $out);
2074          return $this->doTagBr($litem, $out);
2075      }
2076  
2077      /**
2078       * Determines the list type from the Textile input symbol.
2079       *
2080       * @param  string $in Textile input containing the possible list marker
2081       * @return string Either 'd', 'o', 'u'
2082       */
2083  
2084      protected function liType($in)
2085      {
2086          $m = array();
2087          $type = 'd';
2088          if (preg_match('/^(?P<type>[#*]+)/', $in, $m)) {
2089              $type = ('#' === substr($m['type'], -1)) ? 'o' : 'u';
2090          }
2091          return $type;
2092      }
2093  
2094      /**
2095       * Adds br tags within the specified container tag.
2096       *
2097       * @param  string $tag The tag
2098       * @param  string $in  The input
2099       * @return string
2100       */
2101  
2102      protected function doTagBr($tag, $in)
2103      {
2104          return preg_replace_callback(
2105              '@<(?P<tag>'.preg_quote($tag).')(?P<atts>[^>]*?)>(?P<content>.*)(?P<closetag></\1>)@s',
2106              array(&$this, 'fBr'),
2107              $in
2108          );
2109      }
2110  
2111      /**
2112       * Adds br tags to paragraphs and headings.
2113       *
2114       * @param  string $in The input
2115       * @return string
2116       */
2117  
2118      protected function doPBr($in)
2119      {
2120          return preg_replace_callback(
2121              '@<(?P<tag>p|h[1-6])(?P<atts>[^>]*?)>(?P<content>.*)(?P<closetag></\1>)@s',
2122              array(&$this, 'fPBr'),
2123              $in
2124          );
2125      }
2126  
2127      /**
2128       * Less restrictive version of fBr method.
2129       *
2130       * Used only in paragraphs and headings where the next row may
2131       * start with a smiley or perhaps something like '#8 bolt...'
2132       * or '*** stars...'.
2133       *
2134       * @param  string $m The input
2135       * @return string
2136       */
2137  
2138      protected function fPBr($m)
2139      {
2140          // Replaces <br/>\n instances that are not followed by white-space,
2141          // or at end, with single LF.
2142          $content = preg_replace(
2143              "~<br[ ]*/?>{$this->regex_snippets['space']}*\n(?![{$this->regex_snippets['space']}|])~i",
2144              "\n",
2145              $m['content']
2146          );
2147          // Replaces those LFs that aren't followed by white-space, or at end, with <br />.
2148          $content = preg_replace("/\n(?![\s|])/", '<br />', $content);
2149          return '<'.$m['tag'].$m['atts'].'>'.$content.$m['closetag'];
2150      }
2151  
2152      /**
2153       * Formats line breaks.
2154       *
2155       * @param  string $m The input
2156       * @return string
2157       */
2158  
2159      protected function fBr($m)
2160      {
2161          $content = preg_replace("@(.+)(?<!<br>|<br />|</li>|</dd>|</dt>)\n(?![#*;:\s|])@", '$1<br />', $m['content']);
2162          return '<'.$m['tag'].$m['atts'].'>'.$content.$m['closetag'];
2163      }
2164  
2165      /**
2166       * Splits the given input into blocks.
2167       *
2168       * Blocks are separated by double line-break boundaries, and processed
2169       * the blocks one by one.
2170       *
2171       * @param  string $text Textile source text
2172       * @return string Input text with blocks processed
2173       */
2174  
2175      protected function blocks($text)
2176      {
2177          $regex = '/^(?P<tag>'.join('|', $this->blocktag_whitelist).')'.
2178              '(?P<atts>'.$this->a.$this->cls.')\.(?P<ext>\.?)(?::(?P<cite>\S+))? (?P<graf>.*)$/Ss'.
2179              $this->regex_snippets['mod'];
2180  
2181          $textblocks = preg_split('/(\n{2,})/', $text, null, PREG_SPLIT_DELIM_CAPTURE);
2182  
2183          $eatWhitespace = false;
2184          $whitespace = '';
2185          $ext = '';
2186          $out = array();
2187  
2188          foreach ($textblocks as $block) {
2189  
2190              // Line is just whitespace, keep it for the next block.
2191              if (trim($block) === '') {
2192                  if ($eatWhitespace === false) {
2193                      $whitespace .= $block;
2194                  }
2195                  continue;
2196              }
2197  
2198              if (!$ext) {
2199                  $tag   = 'p';
2200                  $atts  = '';
2201                  $cite  = '';
2202                  $graf  = '';
2203                  $eat   = false;
2204              }
2205  
2206              $eatWhitespace = false;
2207              $anonymous_block = !preg_match($regex, $block, $m);
2208  
2209              if (!$anonymous_block) {
2210                  // Last block was extended, so close it
2211                  if ($ext) {
2212                      $out[count($out)-1] .= $c1;
2213                  }
2214  
2215                  // Extract the new block's parts
2216                  extract($m);
2217                  list($o1, $o2, $content, $c2, $c1, $eat) = $this->fBlock($m);
2218  
2219                  // Leave off c1 if this block is extended, we'll close it at the start of the next block
2220                  $block = $o1.$o2.$content.$c2;
2221                  if (!$ext) {
2222                      $block .= $c1;
2223                  }
2224              } else {
2225                  if ($ext || strpos($block, ' ') !== 0) {
2226                      list($o1, $o2, $content, $c2, $c1, $eat) = $this->fBlock(array(
2227                          0,
2228                          $tag,
2229                          $atts,
2230                          $ext,
2231                          $cite,
2232                          $block,
2233                      ));
2234  
2235                      // Skip $o1/$c1 because this is part of a continuing extended block
2236                      if ($tag == 'p' && !$this->hasRawText($content)) {
2237                          $block = $content;
2238                      } else {
2239                          $block = $o2.$content.$c2;
2240                      }
2241                  } else {
2242                      $block = $this->graf($block);
2243                  }
2244              }
2245  
2246              $block = $this->doPBr($block);
2247              $block = $whitespace. str_replace('<br>', '<br />', $block);
2248  
2249              if ($ext && $anonymous_block) {
2250                  $out[count($out)-1] .= $block;
2251              } elseif (!$eat) {
2252                  $out[] = $block;
2253              }
2254  
2255              if ($eat) {
2256                  $eatWhitespace = true;
2257              } else {
2258                  $whitespace = '';
2259              }
2260  
2261          }
2262  
2263          if ($ext) {
2264              $out[count($out)-1] .= $c1;
2265          }
2266  
2267          return join('', $out);
2268      }
2269  
2270      /**
2271       * Formats the given block.
2272       *
2273       * Adds block tags and formats the text content inside
2274       * the block.
2275       *
2276       * @param  string $m The block content to format
2277       * @return array
2278       */
2279  
2280      protected function fBlock($m)
2281      {
2282          list(, $tag, $att, $ext, $cite, $content) = $m;
2283          $atts = $this->parseAttribs($att);
2284          $space = $this->regex_snippets['space'];
2285  
2286          $o1  = '';
2287          $o2  = '';
2288          $c2  = '';
2289          $c1  = '';
2290          $eat = false;
2291  
2292          if ($tag === 'p') {
2293              // Is this an anonymous block with a note definition?
2294              $notedef = preg_replace_callback(
2295                  "/
2296                      ^note\#                              # start of note def marker
2297                      (?P<label>[^%<*!@#^([{ {$space}.]+)  # label
2298                      (?P<link>[*!^]?)                     # link
2299                      (?P<att>{$this->cls})                # att
2300                      \.?                                  # optional period.
2301                      {$space}+                            # whitespace ends def marker
2302                      (?P<content>.*)$                     # content
2303                  /x".$this->regex_snippets['mod'],
2304                  array(&$this, "fParseNoteDefs"),
2305                  $content
2306              );
2307  
2308              if ('' === $notedef) {
2309                  // It will be empty if the regex matched and ate it.
2310                  return array($o1, $o2, $notedef, $c2, $c1, true);
2311              }
2312          }
2313  
2314          if (preg_match("/fn(?P<fnid>{$this->regex_snippets['digit']}+)/".$this->regex_snippets['mod'], $tag, $fns)) {
2315              $tag = 'p';
2316              $fnid = empty($this->fn[$fns['fnid']]) ? $this->linkPrefix . ($this->linkIndex++) : $this->fn[$fns['fnid']];
2317  
2318              // If there is an author-specified ID goes on the wrapper & the auto-id gets pushed to the <sup>
2319              $supp_id = '';
2320              if (strpos($atts, 'class=') === false) {
2321                  $atts .= ' class="footnote"';
2322              }
2323  
2324              if (strpos($atts, ' id=') === false) {
2325                  $atts .= ' id="fn' . $fnid . '"';
2326              } else {
2327                  $supp_id = ' id="fn' . $fnid . '"';
2328              }
2329  
2330              if (strpos($att, '^') === false) {
2331                  $sup = $this->formatFootnote($fns['fnid'], $supp_id);
2332              } else {
2333                  $sup = $this->formatFootnote('<a href="#fnrev' . $fnid . '">'.$fns['fnid'] .'</a>', $supp_id);
2334              }
2335  
2336              $content = $sup . ' ' . $content;
2337          }
2338  
2339          if ($tag == "bq") {
2340              $cite = $this->shelveURL($cite);
2341              $cite = ($cite != '') ? ' cite="' . $cite . '"' : '';
2342              $o1 = "<blockquote$cite$atts>\n";
2343              $o2 = "\t<p".$this->parseAttribs($att, '', 0).">";
2344              $c2 = "</p>";
2345              $c1 = "\n</blockquote>";
2346          } elseif ($tag == 'bc') {
2347              $o1 = "<pre$atts><code>";
2348              $c1 = "</code></pre>";
2349              $content = $this->shelve($this->rEncodeHTML($content));
2350          } elseif ($tag == 'notextile') {
2351              $content = $this->shelve($content);
2352              $o1 = '';
2353              $o2 = '';
2354              $c1 = '';
2355              $c2 = '';
2356          } elseif ($tag == 'pre') {
2357              $content = $this->shelve($this->rEncodeHTML($content));
2358              $o1 = "<pre$atts>";
2359              $o2 = '';
2360              $c2 = '';
2361              $c1 = "</pre>";
2362          } elseif ($tag == '###') {
2363              $eat = true;
2364          } else {
2365              $o2 = "<$tag$atts>";
2366              $c2 = "</$tag>";
2367          }
2368  
2369          $content = (!$eat) ? $this->graf($content) : '';
2370  
2371          return array($o1, $o2, $content, $c2, $c1, $eat);
2372      }
2373  
2374      /**
2375       * Formats a footnote.
2376       *
2377       * @param  string $marker The marker
2378       * @param  string $atts   Attributes
2379       * @param  bool   $anchor TRUE, if its a reference link
2380       * @return string Processed footnote
2381       */
2382  
2383      protected function formatFootnote($marker, $atts = '', $anchor = true)
2384      {
2385          $pattern = ($anchor) ? $this->symbols['fn_foot_pattern'] : $this->symbols['fn_ref_pattern'];
2386          return $this->replaceMarkers($pattern, array('atts' => $atts, 'marker' => $marker));
2387      }
2388  
2389      /**
2390       * Replaces markers with replacements in the given input.
2391       *
2392       * @param  string $text         The input
2393       * @param  array  $replacements Marker replacement pairs
2394       * @return string
2395       */
2396  
2397      protected function replaceMarkers($text, $replacements)
2398      {
2399          if (!empty($replacements)) {
2400              foreach ($replacements as $k => $r) {
2401                  $text = str_replace('{'.$k.'}', $r, $text);
2402              }
2403          }
2404          return $text;
2405      }
2406  
2407      /**
2408       * Parses HTML comments in the given input.
2409       *
2410       * This method finds HTML comments in the given input
2411       * and replaces them with reference tokens.
2412       *
2413       * @param  string $text Textile input
2414       * @return string $text Processed input
2415       */
2416  
2417      protected function getHTMLComments($text)
2418      {
2419          $text = preg_replace_callback(
2420              "/\<!--(?P<content>.*?)-->/sx",
2421              array(&$this, "fParseHTMLComments"),
2422              $text
2423          );
2424          return $text;
2425      }
2426  
2427      /**
2428       * Formats a HTML comment.
2429       *
2430       * Stores the comment on the shelf and returns
2431       * a reference token wrapped in to a HTML comment.
2432       *
2433       * @param  array  $m Options
2434       * @return string Reference token wrapped to a HTML comment tags
2435       */
2436  
2437      protected function fParseHTMLComments($m)
2438      {
2439          return '<!--'.$this->shelve($m['content']).'-->';
2440      }
2441  
2442      /**
2443       * Parses paragraphs in the given input.
2444       *
2445       * @param  string $text Textile input
2446       * @return string Processed input
2447       */
2448  
2449      protected function graf($text)
2450      {
2451          // Handle normal paragraph text
2452          if (!$this->lite) {
2453              // Notextile blocks and inlines
2454              $text = $this->noTextile($text);
2455              // Handle code
2456              $text = $this->code($text);
2457          }
2458  
2459          // HTML comments --
2460          $text = $this->getHTMLComments($text);
2461          // Consume link aliases
2462          $text = $this->getRefs($text);
2463          // Treat quoted quote as a special glyph.
2464          $text = $this->glyphQuotedQuote($text);
2465          // Generate links
2466          $text = $this->links($text);
2467  
2468          // Handle images (if permitted)
2469          if (!$this->noimage) {
2470              $text = $this->images($text);
2471          }
2472  
2473          if (!$this->lite) {
2474              // Handle tables
2475              $text = $this->tables($text);
2476              // Handle redcloth-style definition lists
2477              $text = $this->redclothLists($text);
2478              // Handle ordered & unordered lists plus txp-style definition lists
2479              $text = $this->textileLists($text);
2480          }
2481  
2482          // Inline markup (em, strong, sup, sub, del etc)
2483          $text = $this->spans($text);
2484  
2485          if (!$this->lite) {
2486              // Turn footnote references into supers or links.
2487              // As footnote blocks are banned in lite mode there is no point
2488              // generating links for them.
2489              $text = $this->footnoteRefs($text);
2490  
2491              // Turn note references into links
2492              $text = $this->noteRefs($text);
2493          }
2494  
2495          // Glyph level substitutions (mainly typographic -- " & ' => curly quotes, -- => em-dash etc.
2496          $text = $this->glyphs($text);
2497  
2498          return rtrim($text, "\n");
2499      }
2500  
2501      /**
2502       * Replaces Textile span tags with their equivalent HTML inline tags.
2503       *
2504       * @param  string $text The Textile document to perform the replacements in
2505       * @return string The Textile document with spans replaced by their HTML inline equivalents
2506       */
2507  
2508      protected function spans($text)
2509      {
2510          $span_tags = array_keys($this->span_tags);
2511          $pnct = ".,\"'?!;:‹›«»„“”‚‘’";
2512          $this->span_depth++;
2513  
2514          if ($this->span_depth <= $this->max_span_depth) {
2515              foreach ($span_tags as $tag) {
2516                  $tag = preg_quote($tag);
2517                  $text = preg_replace_callback(
2518                      "/
2519                      (?P<pre>^|(?<=[\s>$pnct\(])|[{[])
2520                      (?P<tag>$tag)(?!$tag)
2521                      (?P<atts>{$this->cls})
2522                      (?!$tag)
2523                      (?::(?P<cite>\S+[^$tag]{$this->regex_snippets['space']}))?
2524                      (?P<content>[^{$this->regex_snippets['space']}$tag]+|\S.*?[^\s$tag\n])
2525                      (?P<end>[$pnct]*)
2526                      $tag
2527                      (?P<tail>$|[\[\]}<]|(?=[$pnct]{1,2}[^0-9]|\s|\)))
2528                      /x".$this->regex_snippets['mod'],
2529                      array(&$this, "fSpan"),
2530                      $text
2531                  );
2532              }
2533          }
2534          $this->span_depth--;
2535          return $text;
2536      }
2537  
2538      /**
2539       * Formats a span tag and stores it on the shelf.
2540       *
2541       * @param  array  $m Options
2542       * @return string Content wrapped to reference tokens
2543       * @see    Parser::spans()
2544       */
2545  
2546      protected function fSpan($m)
2547      {
2548          $tag = $this->span_tags[$m['tag']];
2549          $atts = $this->parseAttribsToArray($m['atts']);
2550  
2551          if ($m['cite'] != '') {
2552              $atts['cite'] = trim($m['cite']);
2553              ksort($atts);
2554          }
2555  
2556          $atts = $this->formatAttributeString($atts);
2557          $content = $this->spans($m['content']);
2558          $opentag = '<'.$tag.$atts.'>';
2559          $closetag = '</'.$tag.'>';
2560          $tags = $this->storeTags($opentag, $closetag);
2561          $out = "{$tags['open']}{$content}{$m['end']}{$tags['close']}";
2562  
2563          if (($m['pre'] && !$m['tail']) || ($m['tail'] && !$m['pre'])) {
2564              $out = $m['pre'].$out.$m['tail'];
2565          }
2566  
2567          return $out;
2568      }
2569  
2570      /**
2571       * Stores a tag pair in the tag cache.
2572       *
2573       * @param  string $opentag  Opening tag
2574       * @param  string $closetag Closing tag
2575       * @return array  Reference tokens for both opening and closing tag
2576       */
2577  
2578      protected function storeTags($opentag, $closetag = '')
2579      {
2580          $tags = array();
2581  
2582          $this->refCache[$this->refIndex] = $opentag;
2583          $tags['open'] = $this->uid.$this->refIndex.':ospan ';
2584          $this->refIndex++;
2585  
2586          $this->refCache[$this->refIndex] = $closetag;
2587          $tags['close'] = ' '.$this->uid.$this->refIndex.':cspan';
2588          $this->refIndex++;
2589  
2590          return $tags;
2591      }
2592  
2593      /**
2594       * Replaces reference tokens with corresponding shelved span tags.
2595       *
2596       * This method puts all shelved span tags back to the final,
2597       * parsed input.
2598       *
2599       * @param  string $text The input
2600       * @return string Processed text
2601       * @see    Parser::storeTags()
2602       */
2603  
2604      protected function retrieveTags($text)
2605      {
2606          $text = preg_replace_callback(
2607              '/'.$this->uid.'(?P<token>[0-9]+):ospan /',
2608              array(&$this, 'fRetrieveTags'),
2609              $text
2610          );
2611  
2612          $text = preg_replace_callback(
2613              '/ '.$this->uid.'(?P<token>[0-9]+):cspan/',
2614              array(&$this, 'fRetrieveTags'),
2615              $text
2616          );
2617  
2618          return $text;
2619      }
2620  
2621      /**
2622       * Retrieves a tag from the tag cache.
2623       *
2624       * @param  array $m Options
2625       * @return string
2626       * @see    Parser::retrieveTags()
2627       */
2628  
2629      protected function fRetrieveTags($m)
2630      {
2631          return $this->refCache[$m['token']];
2632      }
2633  
2634      /**
2635       * Parses note lists in the given input.
2636       *
2637       * This method should be ran after other blocks
2638       * have been processed, but before reference tokens
2639       * have been replaced with their replacements.
2640       *
2641       * @param  string $text Textile input
2642       * @return string Processed input
2643       */
2644  
2645      protected function placeNoteLists($text)
2646      {
2647          extract($this->regex_snippets);
2648  
2649          // Sequence all referenced definitions...
2650          if (!empty($this->notes)) {
2651              $o = array();
2652              foreach ($this->notes as $label => $info) {
2653                  if (!empty($info['seq'])) {
2654                      $o[$info['seq']] = $info;
2655                      $info['seq'] = $label;
2656                  } else {
2657                      $this->unreferencedNotes[] = $info;    // Unreferenced definitions go here for possible future use.
2658                  }
2659              }
2660  
2661              if (!empty($o)) {
2662                  ksort($o);
2663              }
2664  
2665              $this->notes = $o;
2666          }
2667  
2668          // Replace list markers.
2669          $text = preg_replace_callback(
2670              "@<p>notelist(?P<atts>{$this->c})".
2671              "(?:\:(?P<startchar>[$wrd|{$this->syms}]))?".
2672              "(?P<links>[\^!]?)(?P<extras>\+?)\.?$space*</p>@U$mod",
2673              array(&$this, "fNoteLists"),
2674              $text
2675          );
2676  
2677          return $text;
2678      }
2679  
2680      /**
2681       * Formats a note list.
2682       *
2683       * @param  array  $m Options
2684       * @return string Processed note list
2685       */
2686  
2687      protected function fNoteLists($m)
2688      {
2689          if (!$m['startchar']) {
2690              $m['startchar'] = 'a';
2691          }
2692  
2693          $index = $m['links'].$m['extras'].$m['startchar'];
2694  
2695          if (empty($this->notelist_cache[$index])) {
2696              // If not in cache, build the entry...
2697              $out = array();
2698  
2699              if (!empty($this->notes)) {
2700                  foreach ($this->notes as $seq => $info) {
2701                      $links = $this->makeBackrefLink($info, $m['links'], $m['startchar']);
2702                      $atts = '';
2703                      if (!empty($info['def'])) {
2704                          $id = $info['id'];
2705                          extract($info['def']);
2706                          $out[] = "\t".'<li'.$atts.'>'.$links.'<span id="note'.$id.'"> </span>'.$content.'</li>';
2707                      } else {
2708                          $out[] = "\t".'<li'.$atts.'>'.$links.' Undefined Note [#'.$info['seq'].'].</li>';
2709                      }
2710                  }
2711              }
2712  
2713              if ('+' == $m['extras'] && !empty($this->unreferencedNotes)) {
2714                  foreach ($this->unreferencedNotes as $seq => $info) {
2715                      if (!empty($info['def'])) {
2716                          extract($info['def']);
2717                          $out[] = "\t".'<li'.$atts.'>'.$content.'</li>';
2718                      }
2719                  }
2720              }
2721  
2722              $this->notelist_cache[$index] = join("\n", $out);
2723          }
2724  
2725          if ($this->notelist_cache[$index]) {
2726              $atts = $this->parseAttribs($m['atts']);
2727              return "<ol$atts>\n{$this->notelist_cache[$index]}\n</ol>";
2728          }
2729  
2730          return '';
2731      }
2732  
2733      /**
2734       * Renders a note back reference link.
2735       *
2736       * This method renders an array of back reference
2737       * links for notes.
2738       *
2739       * @param  array  $info    Options
2740       * @param  string $g_links Reference type
2741       * @param  int    $i       Instance count
2742       * @return string Processed input
2743       */
2744  
2745      protected function makeBackrefLink(&$info, $g_links, $i)
2746      {
2747          $link    = '';
2748          $atts    = '';
2749          $content = '';
2750          $id      = '';
2751  
2752          if (!empty($info['def'])) {
2753              extract($info['def']);
2754          }
2755  
2756          $backlink_type = ($link) ? $link : $g_links;
2757          $allow_inc = (false === strpos($this->syms, $i));
2758  
2759          $i_ = str_replace(array('&', ';', '#'), '', $this->encodeHigh($i));
2760          $decode = (strlen($i) !== strlen($i_));
2761  
2762          if ($backlink_type === '!') {
2763              return '';
2764          } elseif ($backlink_type === '^') {
2765              return '<sup><a href="#noteref'.$info['refids'][0].'">'.$i.'</a></sup>';
2766          } else {
2767              $out = array();
2768  
2769              foreach ($info['refids'] as $id) {
2770                  $out[] = '<sup><a href="#noteref'.$id.'">'. (($decode) ? $this->decodeHigh($i_) : $i_) .'</a></sup>';
2771                  if ($allow_inc) {
2772                      $i_++;
2773                  }
2774              }
2775  
2776              return join(' ', $out);
2777          }
2778      }
2779  
2780      /**
2781       * Formats note definitions.
2782       *
2783       * This method formats notes and stores them in
2784       * note cache for later use and to build reference
2785       * links.
2786       *
2787       * @param  array  $m Options
2788       * @return string Empty string
2789       */
2790  
2791      protected function fParseNoteDefs($m)
2792      {
2793          $label = $m['label'];
2794          $link  = $m['link'];
2795          $att   = $m['att'];
2796          $content = $m['content'];
2797  
2798          // Assign an id if the note reference parse hasn't found the label yet.
2799          if (empty($this->notes[$label]['id'])) {
2800              $this->notes[$label]['id'] = $this->linkPrefix . ($this->linkIndex++);
2801          }
2802  
2803          // Ignores subsequent defs using the same label
2804          if (empty($this->notes[$label]['def'])) {
2805              $this->notes[$label]['def'] = array(
2806                  'atts'    => $this->parseAttribs($att),
2807                  'content' => $this->graf($content),
2808                  'link'    => $link,
2809              );
2810          }
2811          return '';
2812      }
2813  
2814      /**
2815       * Parses note references in the given input.
2816       *
2817       * This method replaces note reference tags with
2818       * links.
2819       *
2820       * @param  string $text Textile input
2821       * @return string
2822       */
2823  
2824      protected function noteRefs($text)
2825      {
2826          $text = preg_replace_callback(
2827              "/\[(?P<atts>{$this->c})\#(?P<label>[^\]!]+?)(?P<nolink>[!]?)\]/Ux",
2828              array(&$this, "fParseNoteRefs"),
2829              $text
2830          );
2831          return $text;
2832      }
2833  
2834      /**
2835       * Formats note reference links.
2836       *
2837       * By the time this function is called, all note lists will have been
2838       * processed into the notes array, and we can resolve the link numbers in
2839       * the order we process the references.
2840       *
2841       * @param  array  $m Options
2842       * @return string Note reference
2843       */
2844  
2845      protected function fParseNoteRefs($m)
2846      {
2847          $atts = $this->parseAttribs($m['atts']);
2848          $nolink = ($m['nolink'] === '!');
2849  
2850          // Assign a sequence number to this reference if there isn't one already.
2851  
2852          if (empty($this->notes[$m['label']]['seq'])) {
2853              $num = $this->notes[$m['label']]['seq'] = ($this->note_index++);
2854          } else {
2855              $num = $this->notes[$m['label']]['seq'];
2856          }
2857  
2858          // Make our anchor point & stash it for possible use in backlinks when the
2859          // note list is generated later.
2860          $refid = $this->linkPrefix . ($this->linkIndex++);
2861          $this->notes[$m['label']]['refids'][] = $refid;
2862  
2863          // If we are referencing a note that hasn't had the definition parsed yet, then assign it an ID.
2864  
2865          if (empty($this->notes[$m['label']]['id'])) {
2866              $id = $this->notes[$m['label']]['id'] = $this->linkPrefix . ($this->linkIndex++);
2867          } else {
2868              $id = $this->notes[$m['label']]['id'];
2869          }
2870  
2871          // Build the link (if any).
2872          $out = '<span id="noteref'.$refid.'">'.$num.'</span>';
2873  
2874          if (!$nolink) {
2875              $out = '<a href="#note'.$id.'">'.$out.'</a>';
2876          }
2877  
2878          // Build the reference.
2879          return $this->replaceMarkers($this->symbols['nl_ref_pattern'], array('atts' => $atts, 'marker' => $out));
2880      }
2881  
2882      /**
2883       * Parses URI into component parts.
2884       *
2885       * This method splits a URI-like string apart into component parts, while
2886       * also providing validation.
2887       *
2888       * @param  string $uri The string to pick apart (if possible)
2889       * @param  array  $m   Reference to an array the URI component parts are assigned to
2890       * @return bool   TRUE if the string validates as a URI
2891       * @link   http://tools.ietf.org/html/rfc3986#appendix-B
2892       */
2893  
2894      protected function parseURI($uri, &$m)
2895      {
2896          $r = "@^((?P<scheme>[^:/?#]+):)?".
2897              "(//(?P<authority>[^/?#]*))?".
2898              "(?P<path>[^?#]*)".
2899              "(\?(?P<query>[^#]*))?".
2900              "(#(?P<fragment>.*))?@";
2901  
2902          $ok = preg_match($r, $uri, $m);
2903          return $ok;
2904      }
2905  
2906      /**
2907       * Checks whether a component part can be added to a URI.
2908       *
2909       * @param  array  $mask  An array of allowed component parts
2910       * @param  string $name  The component to add
2911       * @param  array  $parts An array of existing components to modify
2912       * @return bool   TRUE if the component can be added
2913       */
2914  
2915      protected function addPart(&$mask, $name, &$parts)
2916      {
2917          return (in_array($name, $mask) && isset($parts[$name]) && '' !== $parts[$name]);
2918      }
2919  
2920      /**
2921       * Rebuild a URI from parsed parts and a mask.
2922       *
2923       * @param  array  $parts  Full array of URI parts
2924       * @param  string $mask   Comma separated list of URI parts to include in the rebuilt URI
2925       * @param  bool   $encode Flag to control encoding of the path part of the rebuilt URI
2926       * @return string         The rebuilt URI
2927       * @link   http://tools.ietf.org/html/rfc3986#section-5.3
2928       */
2929  
2930      protected function rebuildURI($parts, $mask = 'scheme,authority,path,query,fragment', $encode = true)
2931      {
2932          $mask = explode(',', $mask);
2933          $out  = '';
2934  
2935          if ($this->addPart($mask, 'scheme', $parts)) {
2936              $out .= $parts['scheme'] . ':';
2937          }
2938  
2939          if ($this->addPart($mask, 'authority', $parts)) {
2940              $out .= '//' . $parts['authority'];
2941          }
2942  
2943          if ($this->addPart($mask, 'path', $parts)) {
2944              if (!$encode) {
2945                  $out .= $parts['path'];
2946              } else {
2947                  $pp = explode('/', $parts['path']);
2948                  foreach ($pp as &$p) {
2949                      $p = str_replace(array('%25', '%40'), array('%', '@'), rawurlencode($p));
2950                      if (!in_array($parts['scheme'], array('tel','mailto'))) {
2951                          $p = str_replace('%2B', '+', $p);
2952                      }
2953                  }
2954  
2955                  $pp = implode('/', $pp);
2956                  $out .= $pp;
2957              }
2958          }
2959  
2960          if ($this->addPart($mask, 'query', $parts)) {
2961              $out .= '?' . $parts['query'];
2962          }
2963  
2964          if ($this->addPart($mask, 'fragment', $parts)) {
2965              $out .= '#' . $parts['fragment'];
2966          }
2967  
2968          return $out;
2969      }
2970  
2971      /**
2972       * Parses and shelves links in the given input.
2973       *
2974       * This method parses the input Textile document for links.
2975       * Formats and encodes them, and stores the created link
2976       * elements in cache.
2977       *
2978       * @param  string $text Textile input
2979       * @return string The input document with link pulled out and replaced with tokens
2980       */
2981  
2982      protected function links($text)
2983      {
2984          $text = $this->markStartOfLinks($text);
2985          return $this->replaceLinks($text);
2986      }
2987  
2988      /**
2989       * Finds and marks the start of well formed links in the input text.
2990       *
2991       * @param  string $text String to search for link starting positions
2992       * @return string Text with links marked
2993       * @see    Parser::links()
2994       */
2995  
2996      protected function markStartOfLinks($text)
2997      {
2998          // Slice text on '":<not space>' boundaries. These always occur in inline
2999          // links between the link text and the url part and are much more
3000          // infrequent than '"' characters so we have less possible links
3001          // to process.
3002          $mod = $this->regex_snippets['mod'];
3003          $slices = preg_split('/":(?='.$this->regex_snippets['char'].')/'.$mod, $text);
3004  
3005          if (count($slices) > 1) {
3006  
3007              // There are never any start of links in the last slice, so pop it
3008              // off (we'll glue it back later).
3009              $last_slice = array_pop($slices);
3010  
3011              foreach ($slices as &$slice) {
3012  
3013                  // If there is no possible start quote then this slice is not a link
3014                  if (false === strpos($slice, '"')) {
3015                      continue;
3016                  }
3017  
3018                  // Cut this slice into possible starting points wherever we
3019                  // find a '"' character. Any of these parts could represent
3020                  // the start of the link text - we have to find which one.
3021                  $possible_start_quotes = explode('"', $slice);
3022  
3023                  // Start our search for the start of the link with the closest prior
3024                  // quote mark.
3025                  $possibility = rtrim(array_pop($possible_start_quotes));
3026  
3027                  // Init the balanced count. If this is still zero at the end
3028                  // of our do loop we'll mark the " that caused it to balance
3029                  // as the start of the link and move on to the next slice.
3030                  $balanced = 0;
3031                  $linkparts = array();
3032                  $iter = 0;
3033  
3034                  while (null !== $possibility) {
3035                      // Starting at the end, pop off the previous part of the
3036                      // slice's fragments.
3037  
3038                      // Add this part to those parts that make up the link text.
3039                      $linkparts[] = $possibility;
3040                      $len = strlen($possibility) > 0;
3041  
3042                      if ($len) {
3043                          // did this part inc or dec the balanced count?
3044                          if (preg_match('/^\S|=$/'.$mod, $possibility)) {
3045                              $balanced--;
3046                          }
3047  
3048                          if (preg_match('/\S$/'.$mod, $possibility)) {
3049                              $balanced++;
3050                          }
3051  
3052                          $possibility = array_pop($possible_start_quotes);
3053                      } else {
3054                          // If quotes occur next to each other, we get zero length strings.
3055                          // eg. ...""Open the door, HAL!"":url...
3056                          // In this case we count a zero length in the last position as a
3057                          // closing quote and others as opening quotes.
3058                          $balanced = (!$iter++) ? $balanced+1 : $balanced-1;
3059  
3060                          $possibility = array_pop($possible_start_quotes);
3061  
3062                          // If out of possible starting segments we back the last one
3063                          // from the linkparts array
3064                          if (null === $possibility) {
3065                              array_pop($linkparts);
3066                              break;
3067                          }
3068  
3069                          // If the next possibility is empty or ends in a space we have a
3070                          // closing ".
3071                          if (0 === strlen($possibility) ||
3072                              preg_match("~{$this->regex_snippets['space']}$~".$mod, $possibility)) {
3073                              $balanced = 0; // force search exit
3074                          }
3075                      }
3076  
3077                      if ($balanced <= 0) {
3078                          array_push($possible_start_quotes, $possibility);
3079                          break;
3080                      }
3081  
3082                  }
3083  
3084                  // Rebuild the link's text by reversing the parts and sticking them back
3085                  // together with quotes.
3086                  $link_content = implode('"', array_reverse($linkparts));
3087  
3088                  // Rebuild the remaining stuff that goes before the link but that's
3089                  // already in order.
3090                  $pre_link = implode('"', $possible_start_quotes);
3091  
3092                  // Re-assemble the link starts with a specific marker for the next regex.
3093                  $slice = $pre_link . $this->uid.'linkStartMarker:"' . $link_content;
3094              }
3095  
3096              // Add the last part back
3097              $slices[] = $last_slice;
3098          }
3099  
3100          // Re-assemble the full text with the start and end markers
3101          $text = implode('":', $slices);
3102  
3103          return $text;
3104      }
3105  
3106      /**
3107       * Replaces links with tokens and stores them on the shelf.
3108       *
3109       * @param  string $text The input
3110       * @return string Processed input
3111       * @see    Parser::links()
3112       */
3113  
3114      protected function replaceLinks($text)
3115      {
3116          $stopchars = "\s|^'\"*";
3117  
3118          return preg_replace_callback(
3119              '/
3120              (?P<pre>\[)?                    # Optionally open with a square bracket eg. Look ["here":url]
3121              '.$this->uid.'linkStartMarker:" # marks start of the link
3122              (?P<inner>.*?)                  # grab the content of the inner "..." part of the link, can be anything but
3123                                              # do not worry about matching class, id, lang or title yet
3124              ":                              # literal ": marks end of atts + text + title block
3125              (?P<urlx>[^'.$stopchars.']*)    # url upto a stopchar
3126              /x'.$this->regex_snippets['mod'],
3127              array(&$this, "fLink"),
3128              $text
3129          );
3130      }
3131  
3132      /**
3133       * Formats a link and stores it on the shelf.
3134       *
3135       * @param  array  $m Options
3136       * @return string Reference token for the shelved content
3137       * @see    Parser::replaceLinks()
3138       */
3139  
3140      protected function fLink($m)
3141      {
3142          $in    = $m[0];
3143          $pre   = $m['pre'];
3144          $inner = $m['inner'];
3145          $url   = $m['urlx'];
3146          $m = array();
3147  
3148          // Treat empty inner part as an invalid link.
3149          $trimmed = trim($inner);
3150          if (empty($trimmed)) {
3151              return $pre.'"'.$inner.'":'.$url;
3152          }
3153  
3154          // Split inner into $atts, $text and $title..
3155          preg_match(
3156              '/
3157              ^
3158              (?P<atts>' . $this->cls . ')            # $atts (if any)
3159              ' . $this->regex_snippets['space'] . '* # any optional spaces
3160              (?P<text>                               # $text is...
3161                  (!.+!)                              #     an image
3162              |                                       #   else...
3163                  .+?                                 #     link text
3164              )                                       # end of $text
3165              (?:\((?P<title>[^)]+?)\))?              # $title (if any)
3166              $
3167              /x'.$this->regex_snippets['mod'],
3168              $inner,
3169              $m
3170          );
3171          $atts  = isset($m['atts'])  ? $m['atts']  : '';
3172          $text  = isset($m['text'])  ? trim($m['text'])  : $inner;
3173          $title = isset($m['title']) ? $m['title'] : '';
3174          $m = array();
3175  
3176          $pop = $tight = '';
3177          $url_chars = array();
3178          $counts = array(
3179              '['  => null,
3180              ']'  => substr_count($url, ']'), # We need to know how many closing square brackets we have
3181              '('  => null,
3182              ')'  => null,
3183          );
3184  
3185          // Look for footnotes or other square-bracket delimieted stuff at the end of the url...
3186          // eg. "text":url][otherstuff... will have "[otherstuff" popped back out.
3187          //     "text":url?q[]=x][123]    will have "[123]" popped off the back, the remaining closing square brackets
3188          //                               will later be tested for balance
3189          if ($counts[']']) {
3190              if (1 === preg_match('@(?P<url>^.*\])(?P<tight>\[.*?)$@' . $this->regex_snippets['mod'], $url, $m)) {
3191                  $url         = $m['url'];
3192                  $tight       = $m['tight'];
3193                  $m = array();
3194              }
3195          }
3196  
3197          // Split off any trailing text that isn't part of an array assignment.
3198          // eg. "text":...?q[]=value1&q[]=value2 ... is ok
3199          // "text":...?q[]=value1]following  ... would have "following"
3200          // popped back out and the remaining square bracket
3201          // will later be tested for balance
3202          if ($counts[']']) {
3203              if (1 === preg_match('@(?P<url>^.*\])(?!=)(?P<end>.*?)$@' . $this->regex_snippets['mod'], $url, $m)) {
3204                  $url         = $m['url'];
3205                  $tight       = $m['end'] . $tight;
3206                  $m = array();
3207              }
3208          }
3209  
3210          // Does this need to be mb_ enabled? We are only searching for text in the ASCII charset anyway
3211          // Create an array of (possibly) multi-byte characters.
3212          // This is going to allow us to pop off any non-matched or nonsense chars from the url
3213          $len = strlen($url);
3214          $url_chars = str_split($url);
3215  
3216          // Now we have the array of all the multi-byte chars in the url we will parse the
3217          // uri backwards and pop off
3218          // any chars that don't belong there (like . or , or unmatched brackets of various kinds).
3219          $first = true;
3220          do {
3221              $c = array_pop($url_chars);
3222              $popped = false;
3223              switch ($c) {
3224  
3225                  // Textile URL shouldn't end in these characters, we pop
3226                  // them off the end and push them out the back of the url again.
3227                  case '!':
3228                  case '?':
3229                  case ':':
3230                  case ';':
3231                  case '.':
3232                  case ',':
3233                      $pop = $c . $pop;
3234                      $popped = true;
3235                      break;
3236  
3237                  case '>':
3238                      $urlLeft = implode('', $url_chars);
3239  
3240                      if (preg_match('@(?P<tag><\/[a-z]+)$@', $urlLeft, $m)) {
3241                          $url_chars = str_split(substr($urlLeft, 0, strlen($m['tag']) * -1));
3242                          $pop = $m['tag'] . $c . $pop;
3243                          $popped = true;
3244                      }
3245  
3246                      break;
3247  
3248                  case ']':
3249                      // If we find a closing square bracket we are going to see if it is balanced.
3250                      // If it is balanced with matching opening bracket then it is part of the URL
3251                      // else we spit it back out of the URL.
3252                      if (null === $counts['[']) {
3253                          $counts['['] = substr_count($url, '[');
3254                      }
3255  
3256                      if ($counts['['] === $counts[']']) {
3257                          // It is balanced, so keep it
3258                          $url_chars[] = $c;
3259                      } else {
3260                          // In the case of un-matched closing square brackets we just eat it
3261                          $popped = true;
3262                          $counts[']'] -= 1;
3263                          if ($first) {
3264                              $pre = '';
3265                          }
3266                      }
3267                      break;
3268  
3269                  case ')':
3270                      if (null === $counts[')']) {
3271                          $counts['('] = substr_count($url, '(');
3272                          $counts[')'] = substr_count($url, ')');
3273                      }
3274  
3275                      if ($counts['('] === $counts[')']) {
3276                          // It is balanced, so keep it
3277                          $url_chars[] = $c;
3278                      } else {
3279                          // Unbalanced so spit it out the back end
3280                          $pop = $c . $pop;
3281                          $counts[')'] -= 1;
3282                          $popped = true;
3283                      }
3284                      break;
3285  
3286                  default:
3287                      // We have an acceptable character for the end of the url so put it back and
3288                      // exit the character popping loop
3289                      $url_chars[] = $c;
3290                      break;
3291              }
3292              $first = false;
3293          } while ($popped);
3294  
3295          $url = implode('', $url_chars);
3296          $uri_parts = array();
3297          $this->parseURI($url, $uri_parts);
3298  
3299          $scheme         = $uri_parts['scheme'];
3300          $scheme_in_list = in_array($scheme, $this->url_schemes);
3301          $scheme_ok      = ('' === $scheme) || $scheme_in_list;
3302  
3303          if (!$scheme_ok) {
3304              return str_replace($this->uid.'linkStartMarker:', '', $in);
3305          }
3306  
3307          if ('$' === $text) {
3308              if ($scheme_in_list) {
3309                  $text = ltrim($this->rebuildURI($uri_parts, 'authority,path,query,fragment', false), '/');
3310              } else {
3311                  if (isset($this->urlrefs[$url])) {
3312                      $url = urldecode($this->urlrefs[$url]);
3313                  }
3314  
3315                  $text = $url;
3316              }
3317          }
3318  
3319          $text = trim($text);
3320          $title = $this->encodeHTML($title);
3321  
3322          if (!$this->noimage) {
3323              $text = $this->images($text);
3324          }
3325  
3326          $text = $this->spans($text);
3327          $text = $this->glyphs($text);
3328          $url = $this->shelveURL($this->rebuildURI($uri_parts));
3329          $a = $this->newTag(
3330              'a',
3331              $this->parseAttribsToArray($atts),
3332              false
3333          )->title($title)->href($url, true)->rel($this->rel);
3334          $tags = $this->storeTags((string) $a, '</a>');
3335          $out = $this->shelve($tags['open'].trim($text).$tags['close']);
3336  
3337          return $pre . $out . $pop . $tight;
3338      }
3339  
3340       /**
3341        * Finds URI aliases within the given input.
3342        *
3343        * This method finds URI aliases in the Textile input. Links are stored
3344        * in an internal cache, so that they can be referenced from any link
3345        * in the document.
3346        *
3347        * This operation happens before the actual link parsing takes place.
3348        *
3349        * @param  string $text Textile input
3350        * @return string The Textile document with any URI aliases removed
3351        */
3352  
3353      protected function getRefs($text)
3354      {
3355          $pattern = array();
3356  
3357          foreach ($this->url_schemes as $scheme) {
3358              $pattern[] = preg_quote($scheme.':', '/');
3359          }
3360  
3361          $pattern =
3362              '/^\[(?P<alias>.+)\]'.
3363              '(?P<url>(?:'.join('|', $pattern).'|\/)\S+)'.
3364              '(?='.$this->regex_snippets['space'].'|$)/Um';
3365  
3366          return preg_replace_callback($pattern.$this->regex_snippets['mod'], array(&$this, "refs"), $text);
3367      }
3368  
3369      /**
3370       * Parses, encodes and shelves the current URI alias.
3371       *
3372       * @param  array $m Options
3373       * @return string Empty string
3374       * @see    Parser::getRefs()
3375       */
3376  
3377      protected function refs($m)
3378      {
3379          $uri_parts = array();
3380          $this->parseURI($m['url'], $uri_parts);
3381          // Encodes URL if needed.
3382          $this->urlrefs[$m['alias']] = ltrim($this->rebuildURI($uri_parts));
3383          return '';
3384      }
3385  
3386      /**
3387       * Shelves parsed URLs.
3388       *
3389       * Stores away a URL fragments that have been parsed
3390       * and requires no more processing.
3391       *
3392       * @param  string $text The URL
3393       * @return string The fragment's unique reference ID
3394       * @see    Parser::retrieveURLs()
3395       */
3396  
3397      protected function shelveURL($text)
3398      {
3399          if ('' === $text) {
3400              return '';
3401          }
3402  
3403          $this->refCache[$this->refIndex] = $text;
3404          return $this->uid.($this->refIndex++).':url';
3405      }
3406  
3407      /**
3408       * Replaces reference tokens with corresponding shelved URL.
3409       *
3410       * This method puts all shelved URLs back to the final,
3411       * parsed input.
3412       *
3413       * @param  string $text The input
3414       * @return string Processed text
3415       * @see    Parser::shelveURL()
3416       */
3417  
3418      protected function retrieveURLs($text)
3419      {
3420          return preg_replace_callback('/'.$this->uid.'(?P<token>[0-9]+):url/', array(&$this, 'retrieveURL'), $text);
3421      }
3422  
3423      /**
3424       * Retrieves an URL from the shelve.
3425       *
3426       * @param  array  $m Options
3427       * @return string The URL
3428       */
3429  
3430      protected function retrieveURL($m)
3431      {
3432          if (!isset($this->refCache[$m['token']])) {
3433              return '';
3434          }
3435  
3436          $url = $this->refCache[$m['token']];
3437          if (isset($this->urlrefs[$url])) {
3438              $url = $this->urlrefs[$url];
3439          }
3440  
3441          return $this->rEncodeHTML($this->relURL($url));
3442      }
3443  
3444      /**
3445       * Completes and formats a URL.
3446       *
3447       * @param  string $url The URL
3448       * @return string
3449       */
3450  
3451      protected function relURL($url)
3452      {
3453          $parts = @parse_url(urldecode($url));
3454  
3455          if (empty($parts['scheme']) || $parts['scheme'] == 'http') {
3456              if (empty($parts['host']) && (isset($parts['path']) && preg_match('/^\w/', $parts['path']))) {
3457                  $url = $this->relativeImagePrefix.$url;
3458              }
3459          }
3460  
3461          return $url;
3462      }
3463  
3464      /**
3465       * Checks if an URL is relative.
3466       *
3467       * The given URL is considered relative if it doesn't
3468       * contain scheme and hostname.
3469       *
3470       * @param  string $url The URL
3471       * @return bool   TRUE if relative, FALSE otherwise
3472       */
3473  
3474      protected function isRelURL($url)
3475      {
3476          $parts = @parse_url($url);
3477          return (empty($parts['scheme']) && empty($parts['host']));
3478      }
3479  
3480      /**
3481       * Parses and shelves images in the given input.
3482       *
3483       * This method parses the input Textile document for images and
3484       * generates img HTML tags for each one found, caching the
3485       * generated img tag internally and replacing the Textile image with a
3486       * token to the cached tag.
3487       *
3488       * @param  string $text Textile input
3489       * @return string The input document with images pulled out and replaced with tokens
3490       */
3491  
3492      protected function images($text)
3493      {
3494          return preg_replace_callback(
3495              '/
3496              (?:[[{])?                       # pre
3497              \!                              # opening !
3498              (?P<align>\<|\=|\>|&lt;|&gt;)?  # optional alignment              $algn
3499              (?P<atts>'.$this->cls.')        # optional style,class atts       $atts
3500              (?:\.\s)?                       # optional dot-space
3501              (?P<url>[^\s(!]+)               # presume this is the src         $url
3502              \s?                             # optional space
3503              (?:\((?P<title>[^\)]+)\))?      # optional title                  $title
3504              \!                              # closing
3505              (?::(?P<href>\S+)(?<![\]).,]))? # optional href sans final punct. $href
3506              (?:[\]}]|(?=[.,\s)|]|$))        # lookahead: space,.)| or end of string ("|" needed if image in table cell)
3507              /x'.$this->regex_snippets['mod'],
3508              array(&$this, "fImage"),
3509              $text
3510          );
3511      }
3512  
3513      /**
3514       * Formats an image and stores it on the shelf.
3515       *
3516       * @param  array  $m Options
3517       * @return string Reference token for the shelved content
3518       * @see    Parser::images()
3519       */
3520  
3521      protected function fImage($m)
3522      {
3523          $extras = '';
3524  
3525          $align = (isset($m['align'])) ? $m['align'] : '';
3526          $atts  = $m['atts'];
3527          $url   = $m['url'];
3528          $title = (isset($m['title'])) ? $m['title'] : '';
3529          $href  = (isset($m['href'])) ? $m['href'] : '';
3530  
3531          $alignments = array(
3532              '<'    => 'left',
3533              '='    => 'center',
3534              '>'    => 'right',
3535              '&lt;' => 'left',
3536              '&gt;' => 'right',
3537          );
3538  
3539          if (isset($alignments[$align])) {
3540              if ('html5' === $this->doctype) {
3541                  $extras = 'align-'.$alignments[$align];
3542                  $align = '';
3543              } else {
3544                  $align = $alignments[$align];
3545              }
3546          } else {
3547              $align = '';
3548          }
3549  
3550          if ($title) {
3551              $title = $this->encodeHTML($title);
3552          }
3553  
3554          $img = $this->newTag('img', $this->parseAttribsToArray($atts, '', 1, $extras))
3555              ->align($align)
3556              ->alt($title, true)
3557              ->src($this->shelveURL($url), true)
3558              ->title($title);
3559  
3560          if (!$this->dimensionless_images && $this->isRelUrl($url)) {
3561              $real_location = realpath($this->doc_root.ltrim($url, $this->ds));
3562  
3563              if ($real_location) {
3564                  if ($size = getimagesize($real_location)) {
3565                      $img->height($size[1])->width($size[0]);
3566                  }
3567              }
3568          }
3569  
3570          $out = (string) $img;
3571  
3572          if ($href) {
3573              $href = $this->shelveURL($href);
3574              $link = $this->newTag('a', array(), false)->href($href)->rel($this->rel);
3575              $out = (string) $link . "$img</a>";
3576          }
3577  
3578          return $this->shelve($out);
3579      }
3580  
3581      /**
3582       * Parses code blocks in the given input.
3583       *
3584       * @param  string $text The input
3585       * @return string Processed text
3586       */
3587  
3588      protected function code($text)
3589      {
3590          $text = $this->doSpecial($text, '<code>', '</code>', 'fCode');
3591          $text = $this->doSpecial($text, '@', '@', 'fCode');
3592          $text = $this->doSpecial($text, '<pre>', '</pre>', 'fPre');
3593          return $text;
3594      }
3595  
3596      /**
3597       * Formats inline code tags.
3598       *
3599       * @param  array  $m
3600       * @return string
3601       */
3602  
3603      protected function fCode($m)
3604      {
3605          return $m['before'].$this->shelve('<code>'.$this->rEncodeHTML($m['content']).'</code>');
3606      }
3607  
3608      /**
3609       * Formats pre tags.
3610       *
3611       * @param  array  $m Options
3612       * @return string
3613       */
3614  
3615      protected function fPre($m)
3616      {
3617          return $m['before'].'<pre>'.$this->shelve($this->rEncodeHTML($m['content'])).'</pre>';
3618      }
3619  
3620      /**
3621       * Shelves parsed content.
3622       *
3623       * Stores away a fragment of the source text that have been parsed
3624       * and requires no more processing.
3625       *
3626       * @param  string $val The content
3627       * @return string The fragment's unique reference ID
3628       * @see    Parser::retrieve()
3629       */
3630  
3631      protected function shelve($val)
3632      {
3633          $i = $this->uid.($this->refIndex++).':shelve';
3634          $this->shelf[$i] = $val;
3635          return $i;
3636      }
3637  
3638      /**
3639       * Replaces reference tokens with corresponding shelved content.
3640       *
3641       * This method puts all shelved content back to the final,
3642       * parsed input.
3643       *
3644       * @param  string $text The input
3645       * @return string Processed text
3646       * @see    Parser::shelve()
3647       */
3648  
3649      protected function retrieve($text)
3650      {
3651          if ($this->shelf) {
3652              do {
3653                  $old = $text;
3654                  $text = str_replace(array_keys($this->shelf), $this->shelf, $text);
3655              } while ($text != $old);
3656          }
3657  
3658          return $text;
3659      }
3660  
3661      /**
3662       * Removes BOM and unifies line ending in the given input.
3663       *
3664       * @param  string $text Input Textile
3665       * @return string Cleaned version of the input
3666       */
3667  
3668      protected function cleanWhiteSpace($text)
3669      {
3670          // Removes byte order mark.
3671          $out = preg_replace("/^\xEF\xBB\xBF|\x1A/", '', $text);
3672          // Replaces CRLF and CR with single LF.
3673          $out = preg_replace("/\r\n?/", "\n", $out);
3674          // Removes leading tabs and spaces, if the line is otherwise empty.
3675          $out = preg_replace("/^[ \t]*\n/m", "\n", $out);
3676          // Removes leading and ending blank lines.
3677          $out = trim($out, "\n");
3678          return $out;
3679      }
3680  
3681      /**
3682       * Removes any unique tokens from the input.
3683       *
3684       * @param  string $text The input to clean
3685       * @return string Cleaned input
3686       * @since  3.5.5
3687       */
3688  
3689      protected function cleanUniqueTokens($text)
3690      {
3691          return str_replace($this->uid, '', $text);
3692      }
3693  
3694      /**
3695       * Uses the specified callback method to format the content between end and start nodes.
3696       *
3697       * @param  string $text   The input to format
3698       * @param  string $start  The start node to look for
3699       * @param  string $end    The end node to look for
3700       * @param  string $method The callback method
3701       * @return string Processed input
3702       */
3703  
3704      protected function doSpecial($text, $start, $end, $method)
3705      {
3706          return preg_replace_callback(
3707              '/(?P<before>^|\s|[|[({>])'.preg_quote($start, '/').'(?P<content>.*?)'.preg_quote($end, '/').'/ms',
3708              array(&$this, $method),
3709              $text
3710          );
3711      }
3712  
3713      /**
3714       * Parses notextile tags in the given input.
3715       *
3716       * @param  string $text The input
3717       * @return string Processed input
3718       */
3719  
3720      protected function noTextile($text)
3721      {
3722           $text = $this->doSpecial($text, '<notextile>', '</notextile>', 'fTextile');
3723           return $this->doSpecial($text, '==', '==', 'fTextile');
3724      }
3725  
3726      /**
3727       * Format notextile blocks.
3728       *
3729       * @param  array $m Options
3730       * @return string
3731       */
3732  
3733      protected function fTextile($m)
3734      {
3735          return $m['before'].$this->shelve($m['content']);
3736      }
3737  
3738      /**
3739       * Parses footnote reference links in the given input.
3740       *
3741       * This method replaces [n] instances with links.
3742       *
3743       * @param  string $text The input
3744       * @return string $text Processed input
3745       * @see    Parser::footnoteID()
3746       */
3747  
3748      protected function footnoteRefs($text)
3749      {
3750          return preg_replace_callback(
3751              '/(?<=\S)\[(?P<id>'.$this->regex_snippets['digit'].'+)'.
3752              '(?P<nolink>!?)\]'.$this->regex_snippets['space'].'?/U'.$this->regex_snippets['mod'],
3753              array(&$this, 'footnoteID'),
3754              $text
3755          );
3756      }
3757  
3758      /**
3759       * Renders a footnote reference link or ID.
3760       *
3761       * @param  array  $m Options
3762       * @return string Footnote link, or ID
3763       */
3764  
3765      protected function footnoteID($m)
3766      {
3767          $backref = ' class="footnote"';
3768  
3769          if (empty($this->fn[$m['id']])) {
3770              $this->fn[$m['id']] = $id = $this->linkPrefix . ($this->linkIndex++);
3771              $backref .= " id=\"fnrev$id\"";
3772          }
3773  
3774          $fnid = $this->fn[$m['id']];
3775          $footref = ('!' == $m['nolink']) ? $m['id'] : '<a href="#fn'.$fnid.'">'.$m['id'].'</a>';
3776          $footref = $this->formatFootnote($footref, $backref, false);
3777  
3778          return $footref;
3779      }
3780  
3781      /**
3782       * Parses and shelves quoted quotes in the given input.
3783       *
3784       * @param  string $text The text to search for quoted quotes
3785       * @return string
3786       */
3787  
3788      protected function glyphQuotedQuote($text, $find = '"?|"[^"]+"')
3789      {
3790          return preg_replace_callback(
3791              "/ (?P<pre>{$this->quote_starts})(?P<quoted>$find)(?P<post>.) /".$this->regex_snippets['mod'],
3792              array(&$this, "fGlyphQuotedQuote"),
3793              $text
3794          );
3795      }
3796  
3797      /**
3798       * Formats quoted quotes and stores it on the shelf.
3799       *
3800       * @param  array  $m Named regular expression parts
3801       * @return string Input with quoted quotes removed and replaced with tokens
3802       * @see    Parser::glyphQuotedQuote()
3803       */
3804  
3805      protected function fGlyphQuotedQuote($m)
3806      {
3807          // Check the correct closing character was found.
3808          if (!isset($this->quotes[$m['pre']]) || $m['post'] !== $this->quotes[$m['pre']]) {
3809              return $m[0];
3810          }
3811  
3812          $pre = strtr($m['pre'], array(
3813              '"' => '&#8220;',
3814              "'" => '&#8216;',
3815              ' ' => '&nbsp;',
3816          ));
3817  
3818          $post = strtr($m['post'], array(
3819              '"' => '&#8221;',
3820              "'" => '&#8217;',
3821              ' ' => '&nbsp;',
3822          ));
3823  
3824          $found = $m['quoted'];
3825          if (strlen($found) > 1) {
3826              $found = rtrim($this->glyphs($m['quoted']));
3827          } elseif ('"' === $found) {
3828              $found = "&quot;";
3829          }
3830  
3831          $glyph = ' '.$pre.$found.$post.' ';
3832          return $this->shelve($glyph);
3833      }
3834  
3835      /**
3836       * Replaces glyphs in the given input.
3837       *
3838       * This method performs typographical glyph replacements. The input is split
3839       * across HTML-like tags in order to avoid attempting glyph
3840       * replacements within tags.
3841       *
3842       * @param  string $text Input Textile
3843       * @return string
3844       */
3845  
3846      protected function glyphs($text)
3847      {
3848          // Fix: hackish -- adds a space if final char of text is a double quote.
3849          $text = preg_replace('/"\z/', "\" ", $text);
3850  
3851          $text = preg_split("@(<[\w/!?].*>)@Us".$this->regex_snippets['mod'], $text, -1, PREG_SPLIT_DELIM_CAPTURE);
3852          $i = 0;
3853          foreach ($text as $line) {
3854              // Text tag text tag text ...
3855              if (++$i % 2) {
3856                  // Raw < > & chars are already entity encoded in restricted mode
3857                  if (!$this->restricted) {
3858                      $line = preg_replace('/&(?!#?[a-z0-9]+;)/i', '&amp;', $line);
3859                      $line = str_replace(array('<', '>'), array('&lt;', '&gt;'), $line);
3860                  }
3861                  $line = preg_replace($this->glyph_search, $this->glyph_replace, $line);
3862              }
3863              $glyph_out[] = $line;
3864          }
3865          return join('', $glyph_out);
3866      }
3867  
3868      /**
3869       * Replaces glyph references in the given input.
3870       *
3871       * This method removes temporary glyph: instances
3872       * from the input.
3873       *
3874       * @param  string $text The input
3875       * @return string Processed input
3876       */
3877  
3878      protected function replaceGlyphs($text)
3879      {
3880          return str_replace($this->uid.':glyph:', '', $text);
3881      }
3882  
3883      /**
3884       * Translates alignment tag into corresponding CSS text-align property value.
3885       *
3886       * @param  string $in The Textile alignment tag
3887       * @return string CSS text-align value
3888       */
3889  
3890      protected function hAlign($in)
3891      {
3892          $vals = array(
3893              '&lt;'     => 'left',
3894              '&gt;'     => 'right',
3895              '&lt;&gt;' => 'justify',
3896              '<'        => 'left',
3897              '='        => 'center',
3898              '>'        => 'right',
3899              '<>'       => 'justify',
3900          );
3901  
3902          return (isset($vals[$in])) ? $vals[$in] : '';
3903      }
3904  
3905      /**
3906       * Translates vertical alignment tag into corresponding CSS vertical-align property value.
3907       *
3908       * @param  string $in The Textile alignment tag
3909       * @return string CSS vertical-align value
3910       */
3911  
3912      protected function vAlign($in)
3913      {
3914          $vals = array(
3915              '^' => 'top',
3916              '-' => 'middle',
3917              '~' => 'bottom',
3918          );
3919  
3920          return (isset($vals[$in])) ? $vals[$in] : '';
3921      }
3922  
3923      /**
3924       * Converts character codes in the given input from HTML numeric character reference to character code.
3925       *
3926       * Conversion is done according to Textile's multi-byte conversion map.
3927       *
3928       * @param  string $text    The input
3929       * @param  string $charset The character set
3930       * @return string Processed input
3931       */
3932  
3933      protected function encodeHigh($text, $charset = 'UTF-8')
3934      {
3935          if ($this->isMultiByteStringSupported()) {
3936              return mb_encode_numericentity($text, $this->cmap, $charset);
3937          }
3938  
3939          return htmlentities($text, ENT_NOQUOTES, $charset);
3940      }
3941  
3942      /**
3943       * Converts numeric HTML character references to character code.
3944       *
3945       * @param  string $text    The input
3946       * @param  string $charset The character set
3947       * @return string Processed input
3948       */
3949  
3950      protected function decodeHigh($text, $charset = 'UTF-8')
3951      {
3952          $text = (string) intval($text) === (string) $text ? "&#$text;" : "&$text;";
3953  
3954          if ($this->isMultiByteStringSupported()) {
3955              return mb_decode_numericentity($text, $this->cmap, $charset);
3956          }
3957  
3958          return html_entity_decode($text, ENT_NOQUOTES, $charset);
3959      }
3960  
3961      /**
3962       * Convert special characters to HTML entities.
3963       *
3964       * This method's functionality is identical to PHP's own
3965       * htmlspecialchars(). In Textile this is used for sanitising
3966       * the input.
3967       *
3968       * @param  string $str    The string to encode
3969       * @param  bool   $quotes Encode quotes
3970       * @return string Encoded string
3971       * @see    htmlspecialchars()
3972       */
3973  
3974      protected function encodeHTML($str, $quotes = true)
3975      {
3976          $a = array(
3977              '&' => '&amp;',
3978              '<' => '&lt;',
3979              '>' => '&gt;',
3980          );
3981  
3982          if ($quotes) {
3983              $a = $a + array(
3984                  "'" => '&#39;', // Numeric, as in htmlspecialchars
3985                  '"' => '&quot;',
3986              );
3987          }
3988  
3989          return str_replace(array_keys($a), $a, $str);
3990      }
3991  
3992      /**
3993       * Convert special characters to HTML entities.
3994       *
3995       * This is identical to encodeHTML(), but takes restricted
3996       * mode into account. When in restricted mode, only escapes
3997       * quotes.
3998       *
3999       * @param  string $str    The string to encode
4000       * @param  bool   $quotes Encode quotes
4001       * @return string Encoded string
4002       * @see    Parser::encodeHTML()
4003       */
4004  
4005      protected function rEncodeHTML($str, $quotes = true)
4006      {
4007          // In restricted mode, all input but quotes has already been escaped
4008          if ($this->restricted) {
4009              return str_replace('"', '&quot;', $str);
4010          }
4011  
4012          return $this->encodeHTML($str, $quotes);
4013      }
4014  
4015      /**
4016       * Whether multiple mbstring extensions is loaded.
4017       *
4018       * @return bool
4019       * @since  3.5.5
4020       */
4021  
4022      protected function isMultiByteStringSupported()
4023      {
4024          if ($this->mb === null) {
4025              $this->mb = is_callable('mb_strlen');
4026          }
4027  
4028          return $this->mb;
4029      }
4030  
4031      /**
4032       * Whether PCRE supports UTF-8.
4033       *
4034       * @return bool
4035       * @since  3.5.5
4036       */
4037  
4038      protected function isUnicodePcreSupported()
4039      {
4040          return (bool) @preg_match('/\pL/u', 'a');
4041      }
4042  }