PHP Cross Reference - Textpattern - Source: /textpattern/vendors/Netcarver/Textile/Parser.php

Source: /textpattern/vendors/Netcarver/Textile/Parser.php - 5308 lines - 156049 bytes - Summary - Text - Print

Description: Textile - A Humane Web Text Generator.
   1  <?php
   2  
   3  /**
   4   * Textile - A Humane Web Text Generator.
   5   *
   6   * @link https://github.com/textile/php-textile
   7   */
   8  
   9  /*
  10   * Textile - A Humane Web Text Generator
  11   *
  12   * Copyright (c) 2003-2004, Dean Allen
  13   * All rights reserved.
  14   *
  15   * Thanks to Carlo Zottmann <carlo@g-blog.net> for refactoring
  16   * Textile's procedural code into a class framework
  17   *
  18   * Additions and fixes Copyright (c) 2006    Alex Shiels       https://twitter.com/tellyworth
  19   * Additions and fixes Copyright (c) 2010    Stef Dawson       http://stefdawson.com/
  20   * Additions and fixes Copyright (c) 2010-17 Netcarver         https://github.com/netcarver
  21   * Additions and fixes Copyright (c) 2011    Jeff Soo          http://ipsedixit.net/
  22   * Additions and fixes Copyright (c) 2012    Robert Wetzlmayr  http://wetzlmayr.com/
  23   * Additions and fixes Copyright (c) 2012-19 Jukka Svahn       http://rahforum.biz/
  24   *
  25   * Redistribution and use in source and binary forms, with or without
  26   * modification, are permitted provided that the following conditions are met:
  27   *
  28   * * Redistributions of source code must retain the above copyright notice,
  29   * this list of conditions and the following disclaimer.
  30   *
  31   * * Redistributions in binary form must reproduce the above copyright notice,
  32   * this list of conditions and the following disclaimer in the documentation
  33   * and/or other materials provided with the distribution.
  34   *
  35   * * Neither the name Textile nor the names of its contributors may be used to
  36   * endorse or promote products derived from this software without specific
  37   * prior written permission.
  38   *
  39   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  40   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  41   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  42   * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  43   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  44   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  45   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  46   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  47   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  48   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  49   * POSSIBILITY OF SUCH DAMAGE.
  50   */
  51  
  52  /*
  53  Textile usage examples.
  54  
  55  Block modifier syntax:
  56  
  57      Header: h(1-6).
  58      Paragraphs beginning with 'hn. ' (where n is 1-6) are wrapped in header tags.
  59      Example: h1. Header... -> <h1>Header...</h1>
  60  
  61      Paragraph: p. (also applied by default)
  62      Example: p. Text -> <p>Text</p>
  63  
  64      Blockquote: bq.
  65      Example: bq. Block quotation... -> <blockquote>Block quotation...</blockquote>
  66  
  67      Blockquote with citation: bq.:http://citation.url
  68      Example: bq.:http://example.com/ Text...
  69      ->    <blockquote cite="http://example.com">Text...</blockquote>
  70  
  71      Footnote: fn(1-100).
  72      Example: fn1. Footnote... -> <p id="fn1">Footnote...</p>
  73  
  74      Numeric list: #, ##
  75      Consecutive paragraphs beginning with # are wrapped in ordered list tags.
  76      Example: <ol><li>ordered list</li></ol>
  77  
  78      Bulleted list: *, **
  79      Consecutive paragraphs beginning with * are wrapped in unordered list tags.
  80      Example: <ul><li>unordered list</li></ul>
  81  
  82      Definition list:
  83          Terms ;, ;;
  84          Definitions :, ::
  85      Consecutive paragraphs beginning with ; or : are wrapped in definition list tags.
  86      Example: <dl><dt>term</dt><dd>definition</dd></dl>
  87  
  88      Redcloth-style Definition list:
  89          - Term1 := Definition1
  90          - Term2 := Extended
  91            definition =:
  92  
  93  Phrase modifier syntax:
  94  
  95             _emphasis_    ->     <em>emphasis</em>
  96             __italic__    ->     <i>italic</i>
  97               *strong*    ->     <strong>strong</strong>
  98               **bold**    ->     <b>bold</b>
  99           ??citation??    ->     <cite>citation</cite>
 100         -deleted text-    ->     <del>deleted</del>
 101        +inserted text+    ->     <ins>inserted</ins>
 102          ^superscript^    ->     <sup>superscript</sup>
 103            ~subscript~    ->     <sub>subscript</sub>
 104                 @code@    ->     <code>computer code</code>
 105            %(bob)span%    ->     <span class="bob">span</span>
 106  
 107          ==notextile==    ->     leave text alone (do not format)
 108  
 109         "linktext":url    ->     <a href="url">linktext</a>
 110  "linktext(title)":url    ->     <a href="url" title="title">linktext</a>
 111                "$":url    ->     <a href="url">url</a>
 112         "$(title)":url    ->     <a href="url" title="title">url</a>
 113  
 114             !imageurl!    ->     <img src="imageurl" />
 115   !imageurl(alt text)!    ->     <img src="imageurl" alt="alt text" />
 116     !imageurl!:linkurl    ->     <a href="linkurl"><img src="imageurl" /></a>
 117  
 118  ABC(Always Be Closing)   ->     <acronym title="Always Be Closing">ABC</acronym>
 119  
 120  Linked Notes:
 121  
 122      Allows the generation of an automated list of notes with links.
 123  
 124      Linked notes are composed of three parts, a set of named _definitions_, a set of
 125      _references_ to those definitions and one or more _placeholders_ indicating where
 126      the consolidated list of notes is to be placed in your document.
 127  
 128      Definitions:
 129  
 130      Each note definition must occur in its own paragraph and should look like this...
 131  
 132      note#mynotelabel. Your definition text here.
 133  
 134      You are free to use whatever label you wish after the # as long as it is made up
 135      of letters, numbers, colon(:) or dash(-).
 136  
 137      References:
 138  
 139      Each note reference is marked in your text like this[#mynotelabel] and
 140      it will be replaced with a superscript reference that links into the list of
 141      note definitions.
 142  
 143      List placeholder(s):
 144  
 145      The note list can go anywhere in your document. You have to indicate where
 146      like this:
 147  
 148      notelist.
 149  
 150      notelist can take attributes (class#id) like this: notelist(class#id).
 151  
 152      By default, the note list will show each definition in the order that they
 153      are referenced in the text by the _references_. It will show each definition with
 154      a full list of backlinks to each reference. If you do not want this, you can choose
 155      to override the backlinks like this...
 156  
 157      notelist(class#id)!.    Produces a list with no backlinks.
 158      notelist(class#id)^.    Produces a list with only the first backlink.
 159  
 160      Should you wish to have a specific definition display backlinks differently to this
 161      then you can override the backlink method by appending a link override to the
 162      _definition_ you wish to customise.
 163  
 164      note#label.    Uses the citelist's setting for backlinks.
 165      note#label!.   Causes that definition to have no backlinks.
 166      note#label^.   Causes that definition to have one backlink (to the first ref.)
 167      note#label*.   Causes that definition to have all backlinks.
 168  
 169      Any unreferenced notes will be left out of the list unless you explicitly state
 170      you want them by adding a '+'. Like this...
 171  
 172      notelist(class#id)!+. Giving a list of all notes without any backlinks.
 173  
 174      You can mix and match the list backlink control and unreferenced links controls
 175      but the backlink control (if any) must go first. Like so: notelist^+. , not
 176      like this: notelist+^.
 177  
 178      Example...
 179          Scientists say[#lavader] the moon is small.
 180  
 181          note#other. An unreferenced note.
 182  
 183          note#lavader(myliclass). "Proof":http://example.com of a small moon.
 184  
 185          notelist(myclass#myid)+.
 186  
 187          Would output (the actual IDs used would be randomised)...
 188  
 189          <p>Scientists say<sup><a href="#note1" id="noteref1">1</sup> the moon is small.</p>
 190  
 191          <ol class="myclass" id="myid">
 192              <li class="myliclass"><a href="#noteref1"><sup>a</sup></a>
 193                  <span id="note1"> </span><a href="http://example.com">Proof</a> of a small moon.</li>
 194              <li>An unreferenced note.</li>
 195          </ol>
 196  
 197          The 'a b c' backlink characters can be altered too.
 198          For example if you wanted the notes to have numeric backlinks starting from 1:
 199  
 200          notelist:1.
 201  
 202  Table syntax:
 203  
 204      Simple tables:
 205  
 206          |a|simple|table|row|
 207          |And|Another|table|row|
 208          |With an||empty|cell|
 209  
 210          |=. My table caption goes here
 211          |_. A|_. table|_. header|_.row|
 212          |A|simple|table|row|
 213  
 214      Note: Table captions *must* be the first line of the table else treated as a center-aligned cell.
 215  
 216      Tables with attributes:
 217  
 218          table{border:1px solid black}. My table summary here
 219          {background:#ddd;color:red}. |{}| | | |
 220  
 221      To specify thead / tfoot / tbody groups, add one of these on its own line
 222      above the row(s) you wish to wrap (you may specify attributes before the dot):
 223  
 224          |^.     # thead
 225          |-.     # tbody
 226          |~.     # tfoot
 227  
 228      Column groups:
 229  
 230          |:\3. 100|
 231  
 232          Becomes:
 233              <colgroup span="3" width="100"></colgroup>
 234  
 235          You can omit either or both of the \N or width values. You may also
 236          add cells after the colgroup definition to specify col elements with
 237          span, width, or standard Textile attributes:
 238  
 239          |:. 50|(firstcol). |\2. 250||300|
 240  
 241          Becomes:
 242              <colgroup width="50">
 243                  <col class="firstcol" />
 244                  <col span="2" width="250" />
 245                  <col />
 246                  <col width="300" />
 247              </colgroup>
 248  
 249          (Note that, per the HTML specification, you should not add span
 250          to the colgroup if specifying col elements.)
 251  
 252  Applying Attributes:
 253  
 254      Most anywhere Textile code is used, attributes such as arbitrary css style,
 255      css classes, and ids can be applied. The syntax is fairly consistent.
 256  
 257      The following characters quickly alter the alignment of block elements:
 258  
 259          <  ->  left align     ex. p<. left-aligned para
 260          >  ->  right align         h3>. right-aligned header 3
 261          =  ->  centred             h4=. centred header 4
 262          <> ->  justified         p<>. justified paragraph
 263  
 264      These will change vertical alignment in table cells:
 265  
 266          ^  ->  top           ex. |^. top-aligned table cell|
 267          -  ->  middle           |-. middle aligned|
 268          ~  ->  bottom           |~. bottom aligned cell|
 269  
 270      Plain (parentheses) inserted between block syntax and the closing dot-space
 271      indicate classes and ids:
 272  
 273          p(hector). paragraph -> <p class="hector">paragraph</p>
 274  
 275          p(#fluid). paragraph -> <p id="fluid">paragraph</p>
 276  
 277          (classes and ids can be combined)
 278          p(hector#fluid). paragraph -> <p class="hector" id="fluid">paragraph</p>
 279  
 280      Curly {brackets} insert arbitrary css style
 281  
 282          p{line-height:18px}. paragraph -> <p style="line-height:18px">paragraph</p>
 283  
 284          h3{color:red}. header 3 -> <h3 style="color:red">header 3</h3>
 285  
 286      Square [brackets] insert language attributes
 287  
 288          p[no]. paragraph -> <p lang="no">paragraph</p>
 289  
 290          %[fr]phrase% -> <span lang="fr">phrase</span>
 291  
 292      Usually Textile block element syntax requires a dot and space before the block
 293      begins, but since lists don't, they can be styled just using braces
 294  
 295          #{color:blue} one  ->  <ol style="color:blue">
 296          # big                    <li>one</li>
 297          # list                    <li>big</li>
 298                                  <li>list</li>
 299                                 </ol>
 300  
 301      Using the span tag to style a phrase
 302  
 303          It goes like this, %{color:red}the fourth the fifth%
 304                -> It goes like this, <span style="color:red">the fourth the fifth</span>
 305  
 306  Ordered list start and continuation:
 307  
 308      You can control the start attribute of an ordered list like so;
 309  
 310          #5 Item 5
 311          # Item 6
 312  
 313      You can resume numbering list items after some intervening anonymous block like so...
 314  
 315          #_ Item 7
 316          # Item 8
 317  */
 318  
 319  namespace Netcarver\Textile;
 320  
 321  /**
 322   * Textile parser.
 323   *
 324   * The Parser class takes Textile input and converts it to well formatted HTML.
 325   * This is the library's main class, hosting the parsing functionality and
 326   * exposing a simple public interface for you to use.
 327   *
 328   * The most basic use case would involve initialising a instance of the class
 329   * and calling the Parser::parse() method:
 330   *
 331   * bc. $parser = new \Netcarver\Textile\Parser();
 332   * echo $parser->parse('h1. Hello World!');
 333   *
 334   * The above generates:
 335   *
 336   * bc. <h1>Hello World!</h1>
 337   *
 338   * The functionality of the parser can be customized with the setters:
 339   *
 340   * bc. $parser = new \Netcarver\Textile\Parser();
 341   * $parser->setImages(false)->parse('!no-image.jpg!');
 342   *
 343   * The Parser class can also be extended to create pre-configured classes:
 344   *
 345   * bc.. namespace MyApp;
 346   *
 347   * use \Netcarver\Textile\Parser;
 348   *
 349   * class CommentParser extends Parser
 350   * {
 351   *     protected function configure()
 352   *     {
 353   *         $this->setImages(false)->setRestricted(true)->setLite(true);
 354   *     }
 355   * }
 356   *
 357   * p. Keep in mind that the classes' protected methods and properties should be
 358   * considered part of the private API and depending on them should be avoided.
 359   * Instead try to only use the public methods marked as being part of the
 360   * public API.
 361   *
 362   * @see Parser::__construct()
 363   * @see Parser::parse()
 364   */
 365  
 366  class Parser
 367  {
 368      /**
 369       * Version number.
 370       *
 371       * @var string
 372       */
 373  
 374      protected $ver = '3.7.6';
 375  
 376      /**
 377       * Regular expression snippets.
 378       *
 379       * @var array
 380       */
 381  
 382      protected $regex_snippets;
 383  
 384      /**
 385       * Pattern for horizontal align.
 386       *
 387       * @var string
 388       */
 389  
 390      protected $hlgn = "(?:\<(?!>)|&lt;&gt;|&gt;|&lt;|(?<!<)\>|\<\>|\=|[()]+(?! ))";
 391  
 392      /**
 393       * Pattern for vertical align.
 394       *
 395       * @var string
 396       */
 397  
 398      protected $vlgn = "[\-^~]";
 399  
 400      /**
 401       * Pattern for HTML classes and IDs.
 402       *
 403       * Does not allow classes/ids/languages/styles to span across
 404       * newlines if used in a dotall regular expression.
 405       *
 406       * @var string
 407       */
 408  
 409      protected $clas = "(?:\([^)\n]+\))";
 410  
 411      /**
 412       * Pattern for language attribute.
 413       *
 414       * @var string
 415       */
 416  
 417      protected $lnge = "(?:\[[^]\n]+\])";
 418  
 419      /**
 420       * Pattern for style attribute.
 421       *
 422       * @var string
 423       */
 424  
 425      protected $styl = "(?:\{[^}\n]+\})";
 426  
 427      /**
 428       * Regular expression pattern for column spans in tables.
 429       *
 430       * @var string
 431       */
 432  
 433      protected $cspn = "(?:\\\\[0-9]+)";
 434  
 435      /**
 436       * Regular expression for row spans in tables.
 437       *
 438       * @var string
 439       */
 440  
 441      protected $rspn = "(?:\/[0-9]+)";
 442  
 443      /**
 444       * Regular expression for horizontal or vertical alignment.
 445       *
 446       * @var string
 447       */
 448  
 449      protected $a;
 450  
 451      /**
 452       * Regular expression for column or row spans in tables.
 453       *
 454       * @var string
 455       */
 456  
 457      protected $s;
 458  
 459      /**
 460       * Pattern that matches a class, style, language and horizontal alignment attributes.
 461       *
 462       * @var string
 463       */
 464  
 465      protected $c;
 466  
 467      /**
 468       * Pattern that matches class, style and language attributes.
 469       *
 470       * Allows all 16 possible permutations of class, style and language attributes.
 471       * No attribute, c, cl, cs, cls, csl, l, lc, ls, lcs, lsc, s, sc, sl, scl or slc.
 472       *
 473       * @var string
 474       */
 475  
 476      protected $cls;
 477  
 478      /**
 479       * Whitelisted block tags.
 480       *
 481       * @var array
 482       */
 483  
 484      protected $blocktag_whitelist = array();
 485  
 486      /**
 487       * Whether raw blocks are enabled.
 488       *
 489       * @var   bool
 490       * @since 3.7.0
 491       */
 492  
 493      protected $rawBlocksEnabled = false;
 494  
 495      /**
 496       * An array of patterns used for matching phrasing tags.
 497       *
 498       * Phrasing tags, unline others, are wrapped in a paragraph even if they
 499       * already wrap the block.
 500       *
 501       * @var   array
 502       * @since 3.7.0
 503       */
 504  
 505      protected $phrasingContent = array(
 506          'a',
 507          'abbr',
 508          'acronym',
 509          'area',
 510          'audio',
 511          'b',
 512          'bdo',
 513          'br',
 514          'button',
 515          'canvas',
 516          'cite',
 517          'code',
 518          'command',
 519          'data',
 520          'datalist',
 521          'del',
 522          'dfn',
 523          'em',
 524          'embed',
 525          'i',
 526          'iframe',
 527          'img',
 528          'input',
 529          'ins',
 530          'kbd',
 531          'keygen',
 532          'label',
 533          'link',
 534          'map',
 535          'mark',
 536          'math',
 537          'meta',
 538          'meter',
 539          'noscript',
 540          'object',
 541          'output',
 542          'progress',
 543          'q',
 544          'ruby',
 545          'samp',
 546          'script',
 547          'select',
 548          'small',
 549          'span',
 550          'strong',
 551          'sub',
 552          'sup',
 553          'svg',
 554          'textarea',
 555          'time',
 556          'var',
 557          'video',
 558          'wbr',
 559      );
 560  
 561      /**
 562       * An array of patterns used to match divider tags.
 563       *
 564       * Blocks containing only self-closing divider tags are not wrapped in
 565       * paragraph tags.
 566       *
 567       * @var   array
 568       * @since 3.7.0
 569       */
 570  
 571      protected $dividerContent = array(
 572          'br',
 573          'hr',
 574          'img',
 575      );
 576  
 577      /**
 578       * An array of patterns used to match unwrappable block tags.
 579       *
 580       * Blocks containing any of these unwrappable tags will not be wrapped in
 581       * paragraphs.
 582       *
 583       * @var   array
 584       * @since 3.7.0
 585       */
 586  
 587      protected $blockContent = array(
 588          'address',
 589          'article',
 590          'aside',
 591          'blockquote',
 592          'details',
 593          'div',
 594          'dl',
 595          'fieldset',
 596          'figure',
 597          'footer',
 598          'form',
 599          'h1',
 600          'h2',
 601          'h3',
 602          'h4',
 603          'h5',
 604          'h6',
 605          'header',
 606          'hgroup',
 607          'main',
 608          'menu',
 609          'nav',
 610          'ol',
 611          'p',
 612          'pre',
 613          's',
 614          'section',
 615          'table',
 616          'template',
 617          'ul',
 618      );
 619  
 620      /**
 621       * An array of built patterns.
 622       *
 623       * @var   array
 624       * @since 3.7.0
 625       */
 626  
 627      protected $patterns;
 628  
 629      /**
 630       * Whether block tags are enabled.
 631       *
 632       * @var   bool
 633       * @since 3.6.0
 634       */
 635  
 636      protected $blockTagsEnabled = true;
 637  
 638      /**
 639       * Whether lines are wrapped.
 640       *
 641       * @var   bool
 642       * @since 3.6.0
 643       */
 644  
 645      protected $lineWrapEnabled = true;
 646  
 647      /**
 648       * Pattern for punctation.
 649       *
 650       * @var string
 651       */
 652  
 653      protected $pnct = '[\!"#\$%&\'()\*\+,\-\./:;<=>\?@\[\\\]\^_`{\|}\~]';
 654  
 655      /**
 656       * Pattern for URL.
 657       *
 658       * @var string
 659       */
 660  
 661      protected $urlch;
 662  
 663      /**
 664       * Matched marker symbols.
 665       *
 666       * @var string
 667       */
 668  
 669      protected $syms = '¤§µ¶†‡•∗∴◊♠♣♥♦';
 670  
 671      /**
 672       * HTML rel attribute used for links.
 673       *
 674       * @var string
 675       */
 676  
 677      protected $rel = '';
 678  
 679      /**
 680       * Array of footnotes.
 681       *
 682       * @var array
 683       */
 684  
 685      protected $fn;
 686  
 687      /**
 688       * Shelved content.
 689       *
 690       * Stores fragments of the source text that have been parsed
 691       * and require no more processing.
 692       *
 693       * @var array
 694       */
 695  
 696      protected $shelf = array();
 697  
 698      /**
 699       * Restricted mode.
 700       *
 701       * @var bool
 702       */
 703  
 704      protected $restricted = false;
 705  
 706      /**
 707       * Disallow images.
 708       *
 709       * @var bool
 710       */
 711  
 712      protected $noimage = false;
 713  
 714      /**
 715       * Lite mode.
 716       *
 717       * @var bool
 718       */
 719  
 720      protected $lite = false;
 721  
 722      /**
 723       * Accepted link protocols.
 724       *
 725       * @var array
 726       */
 727  
 728      protected $url_schemes = array();
 729  
 730      /**
 731       * Restricted link protocols.
 732       *
 733       * @var array
 734       */
 735  
 736      protected $restricted_url_schemes = array(
 737          'http',
 738          'https',
 739          'ftp',
 740          'mailto',
 741      );
 742  
 743      /**
 744       * Unrestricted link protocols.
 745       *
 746       * @var array
 747       */
 748  
 749      protected $unrestricted_url_schemes = array(
 750          'http',
 751          'https',
 752          'ftp',
 753          'mailto',
 754          'file',
 755          'tel',
 756          'callto',
 757          'sftp',
 758      );
 759  
 760      /**
 761       * Span tags.
 762       *
 763       * @var array
 764       */
 765  
 766      protected $span_tags = array(
 767          '*'  => 'strong',
 768          '**' => 'b',
 769          '??' => 'cite',
 770          '_'  => 'em',
 771          '__' => 'i',
 772          '-'  => 'del',
 773          '%'  => 'span',
 774          '+'  => 'ins',
 775          '~'  => 'sub',
 776          '^'  => 'sup',
 777      );
 778  
 779      /**
 780       * Span wrappers.
 781       *
 782       * @var   array
 783       * @since 3.7.2
 784       */
 785  
 786      protected $spanWrappers = array(
 787          '[' => ']',
 788      );
 789  
 790      /**
 791       * Patterns for finding glyphs.
 792       *
 793       * An array of regex patterns used to find text features
 794       * such as apostrophes, fractions and em-dashes. Each
 795       * entry in this array must have a corresponding entry in
 796       * the $glyph_replace array.
 797       *
 798       * @var array
 799       * @see Parser::$glyph_replace
 800       */
 801  
 802      protected $glyph_search = array();
 803  
 804      /**
 805       * Glyph replacements.
 806       *
 807       * An array of replacements used to insert typographic glyphs
 808       * into the text. Each entry must have a corresponding entry in
 809       * the $glyph_search array and may refer to values captured in
 810       * the corresponding search regex.
 811       *
 812       * @var array
 813       * @see Parser::$glyph_search
 814       */
 815  
 816      protected $glyph_replace = array();
 817  
 818      /**
 819       * Indicates whether glyph substitution is required.
 820       *
 821       * Dirty flag, set by Parser::setSymbol(), indicating the parser needs to
 822       * rebuild the glyph substitutions before the next parse.
 823       *
 824       * @var bool
 825       * @see Parser::setSymbol()
 826       */
 827  
 828      protected $rebuild_glyphs = true;
 829  
 830      /**
 831       * Relative image path.
 832       *
 833       * @var string
 834       * @deprecated in 3.7.0
 835       * @see Parser::$relImagePrefix
 836       * @see Parser::$relLinkPrefix
 837       */
 838  
 839      protected $relativeImagePrefix;
 840  
 841      /**
 842       * Relative link prefix.
 843       *
 844       * @var   string
 845       * @since 3.7.0
 846       */
 847  
 848      protected $relLinkPrefix = '';
 849  
 850      /**
 851       * Prefix applied to relative images.
 852       *
 853       * @var   string
 854       * @since 3.7.0
 855       */
 856  
 857      protected $relImagePrefix = '';
 858  
 859      /**
 860       * Maximum nesting level for inline elements.
 861       *
 862       * @var int
 863       */
 864  
 865      protected $max_span_depth = 5;
 866  
 867      /**
 868       * Server document root.
 869       *
 870       * @var string
 871       */
 872  
 873      protected $doc_root;
 874  
 875      /**
 876       * Target document type.
 877       *
 878       * @var string
 879       */
 880  
 881      protected $doctype;
 882  
 883      /**
 884       * An array of supported doctypes.
 885       *
 886       * @var   array
 887       * @since 3.6.0
 888       */
 889  
 890      protected $doctypes = array(
 891          'xhtml',
 892          'html5',
 893      );
 894  
 895      /**
 896       * Substitution symbols.
 897       *
 898       * Basic symbols used in textile glyph replacements. To override these, call
 899       * setSymbol method before calling Parser::parse().
 900       *
 901       * @var array
 902       * @see Parser::setSymbol()
 903       * @see Parser::parse()
 904       */
 905  
 906      protected $symbols = array(
 907          'quote_single_open'  => '&#8216;',
 908          'quote_single_close' => '&#8217;',
 909          'quote_double_open'  => '&#8220;',
 910          'quote_double_close' => '&#8221;',
 911          'apostrophe'         => '&#8217;',
 912          'prime'              => '&#8242;',
 913          'prime_double'       => '&#8243;',
 914          'ellipsis'           => '&#8230;',
 915          'emdash'             => '&#8212;',
 916          'endash'             => '&#8211;',
 917          'dimension'          => '&#215;',
 918          'trademark'          => '&#8482;',
 919          'registered'         => '&#174;',
 920          'copyright'          => '&#169;',
 921          'half'               => '&#189;',
 922          'quarter'            => '&#188;',
 923          'threequarters'      => '&#190;',
 924          'degrees'            => '&#176;',
 925          'plusminus'          => '&#177;',
 926          'fn_ref_pattern'     => '<sup{atts}>{marker}</sup>',
 927          'fn_foot_pattern'    => '<sup{atts}>{marker}</sup>',
 928          'nl_ref_pattern'     => '<sup{atts}>{marker}</sup>',
 929          'caps'               => '<span class="caps">{content}</span>',
 930          'acronym'            => null,
 931      );
 932  
 933      /**
 934       * Dimensionless images flag.
 935       *
 936       * @var bool
 937       */
 938  
 939      protected $dimensionless_images = false;
 940  
 941      /**
 942       * Directory separator.
 943       *
 944       * @var string
 945       */
 946  
 947      protected $ds = '/';
 948  
 949      /**
 950       * Whether mbstring extension is installed.
 951       *
 952       * @var bool
 953       */
 954  
 955      protected $mb;
 956  
 957      /**
 958       * Multi-byte conversion map.
 959       *
 960       * @var array
 961       */
 962  
 963      protected $cmap = array(0x0080, 0xffff, 0, 0xffff);
 964  
 965      /**
 966       * Stores note index.
 967       *
 968       * @var int
 969       */
 970  
 971      protected $note_index = 1;
 972  
 973      /**
 974       * Stores unreferenced notes.
 975       *
 976       * @var array
 977       */
 978  
 979      protected $unreferencedNotes = array();
 980  
 981      /**
 982       * Stores note lists.
 983       *
 984       * @var array
 985       */
 986  
 987      protected $notelist_cache = array();
 988  
 989      /**
 990       * Stores notes.
 991       *
 992       * @var array
 993       */
 994  
 995      protected $notes = array();
 996  
 997      /**
 998       * Stores URL references.
 999       *
1000       * @var array
1001       */
1002  
1003      protected $urlrefs = array();
1004  
1005      /**
1006       * Stores span depth.
1007       *
1008       * @var int
1009       */
1010  
1011      protected $span_depth = 0;
1012  
1013      /**
1014       * Unique ID used for reference tokens.
1015       *
1016       * @var string
1017       */
1018  
1019      protected $uid;
1020  
1021      /**
1022       * Token reference index.
1023       *
1024       * @var int
1025       */
1026  
1027      protected $refIndex = 1;
1028  
1029      /**
1030       * Stores references values.
1031       *
1032       * @var array
1033       */
1034  
1035      protected $refCache = array();
1036  
1037      /**
1038       * Matched open and closed quotes.
1039       *
1040       * @var array
1041       */
1042  
1043      protected $quotes = array(
1044          '"' => '"',
1045          "'" => "'",
1046          '(' => ')',
1047          '{' => '}',
1048          '[' => ']',
1049          '«' => '»',
1050          '»' => '«',
1051          '‹' => '›',
1052          '›' => '‹',
1053          '„' => '“',
1054          '‚' => '‘',
1055          '‘' => '’',
1056          '”' => '“',
1057      );
1058  
1059      /**
1060       * Regular expression that matches starting quotes.
1061       *
1062       * @var string
1063       */
1064  
1065      protected $quote_starts;
1066  
1067      /**
1068       * Ordered list starts.
1069       *
1070       * @var array
1071       */
1072  
1073      protected $olstarts = array();
1074  
1075      /**
1076       * Link prefix.
1077       *
1078       * @var string
1079       */
1080  
1081      protected $linkPrefix;
1082  
1083      /**
1084       * Link index.
1085       *
1086       * @var int
1087       */
1088  
1089      protected $linkIndex = 1;
1090  
1091      /**
1092       * Constructor.
1093       *
1094       * The constructor allows setting options that affect the class instance as
1095       * a whole, such as the output doctype. To instruct the parser to return
1096       * HTML5 markup instead of XHTML, set $doctype argument to 'html5'.
1097       *
1098       * bc. $parser = new \Netcarver\Textile\Parser('html5');
1099       * echo $parser->parse('HTML(HyperText Markup Language)");
1100       *
1101       * @param  string $doctype The output document type, either 'xhtml' or 'html5'
1102       * @throws \InvalidArgumentException
1103       * @see    Parser::configure()
1104       * @see    Parser::parse()
1105       * @see    Parser::setDocumentType()
1106       * @api
1107       */
1108  
1109      public function __construct($doctype = 'xhtml')
1110      {
1111          $this->setDocumentType($doctype)->setRestricted(false);
1112          $uid = uniqid(rand());
1113          $this->uid = 'textileRef:'.$uid.':';
1114          $this->linkPrefix = $uid.'-';
1115          $this->a = "(?:$this->hlgn|$this->vlgn)*";
1116          $this->s = "(?:$this->cspn|$this->rspn)*";
1117          $this->c = "(?:$this->clas|$this->styl|$this->lnge|$this->hlgn)*";
1118  
1119          $this->cls = '(?:'.
1120              "$this->clas(?:".
1121                  "$this->lnge(?:$this->styl)?|$this->styl(?:$this->lnge)?".
1122                  ')?|'.
1123              "$this->lnge(?:".
1124                  "$this->clas(?:$this->styl)?|$this->styl(?:$this->clas)?".
1125                  ')?|'.
1126              "$this->styl(?:".
1127                  "$this->clas(?:$this->lnge)?|$this->lnge(?:$this->clas)?".
1128                  ')?'.
1129              ')?';
1130  
1131          if ($this->isUnicodePcreSupported()) {
1132              $this->regex_snippets = array(
1133                  'acr'   => '\p{Lu}\p{Nd}',
1134                  'abr'   => '\p{Lu}',
1135                  'nab'   => '\p{Ll}',
1136                  'wrd'   => '(?:\p{L}|\p{M}|\p{N}|\p{Pc})',
1137                  'mod'   => 'u', // Make sure to mark the unicode patterns as such, Some servers seem to need this.
1138                  'cur'   => '\p{Sc}',
1139                  'digit' => '\p{N}',
1140                  'space' => '(?:\p{Zs}|\h|\v)',
1141                  'char'  => '(?:[^\p{Zs}\h\v])',
1142              );
1143          } else {
1144              $this->regex_snippets = array(
1145                  'acr'   => 'A-Z0-9',
1146                  'abr'   => 'A-Z',
1147                  'nab'   => 'a-z',
1148                  'wrd'   => '\w',
1149                  'mod'   => '',
1150                  'cur'   => '',
1151                  'digit' => '\d',
1152                  'space' => '(?:\s|\h|\v)',
1153                  'char'  => '\S',
1154              );
1155          }
1156  
1157          $this->urlch = '['.$this->regex_snippets['wrd'].'"$\-_.+!*\'(),";\/?:@=&%#{}|\\^~\[\]`]';
1158          $this->quote_starts = implode('|', array_map('preg_quote', array_keys($this->quotes)));
1159  
1160          if (defined('DIRECTORY_SEPARATOR')) {
1161              $this->ds = DIRECTORY_SEPARATOR;
1162          }
1163  
1164          if (php_sapi_name() === 'cli') {
1165              if (($cwd = getcwd()) !== false) {
1166                  $this->setDocumentRootDirectory($cwd);
1167              }
1168          } elseif (!empty($_SERVER['DOCUMENT_ROOT'])) {
1169              $this->setDocumentRootDirectory($_SERVER['DOCUMENT_ROOT']);
1170          } elseif (!empty($_SERVER['PATH_TRANSLATED'])) {
1171              $this->setDocumentRootDirectory($_SERVER['PATH_TRANSLATED']);
1172          }
1173  
1174          $this->configure();
1175      }
1176  
1177      /**
1178       * Configure the current parser.
1179       *
1180       * This method can be extended to create a pre-configured parser class.
1181       *
1182       * bc.. namespace MyApp;
1183       *
1184       * use Netcarver\Textile\Parser;
1185       *
1186       * class CommentParser extends Parser
1187       * {
1188       *     protected function configure()
1189       *     {
1190       *         $this->setImages(false)->setRestricted(true)->setLite(true);
1191       *     }
1192       * }
1193       *
1194       * @since  3.7.0
1195       * @return void Return value is ignored
1196       * @api
1197       */
1198  
1199      protected function configure()
1200      {
1201      }
1202  
1203      /**
1204       * Sets the output document type.
1205       *
1206       * bc. $parser = new \Netcarver\Textile\Parser();
1207       * echo $parser
1208       *     ->setDocumentType('html5')
1209       *     ->parse('HTML(HyperText Markup Language)");
1210       *
1211       * @param  string $doctype Either 'xhtml' or 'html5'
1212       * @return Parser This instance
1213       * @since  3.6.0
1214       * @see    Parser::getDocumentType()
1215       * @api
1216       */
1217  
1218      public function setDocumentType($doctype)
1219      {
1220          if (in_array($doctype, $this->doctypes, true)) {
1221              if ($this->getDocumentType() !== $doctype) {
1222                  $this->doctype = $doctype;
1223                  $this->rebuild_glyphs = true;
1224              }
1225  
1226              return $this;
1227          }
1228  
1229          throw new \InvalidArgumentException('Invalid doctype given.');
1230      }
1231  
1232      /**
1233       * Gets the current output document type.
1234       *
1235       * bc. $parser = new \Netcarver\Textile\Parser();
1236       * echo $parser->getDocumentType();
1237       *
1238       * @return string The document type
1239       * @since  3.6.0
1240       * @see    Parser::setDocumentType()
1241       * @api
1242       */
1243  
1244      public function getDocumentType()
1245      {
1246          return $this->doctype;
1247      }
1248  
1249      /**
1250       * Sets the document root directory path.
1251       *
1252       * This method sets the path that is used to resolve relative file paths
1253       * within local filesystem. This is used to fetch image dimensions, for
1254       * instance.
1255       *
1256       * bc. $parser = new \Netcarver\Textile\Parser();
1257       * $parser->setDocumentRootDirectory('/path/to/document/root/dir');
1258       *
1259       * If not set, document root defaults to the current working directory if
1260       * PHP-Textile is used via CLI. On server environment, DOCUMENT_ROOT or
1261       * PATH_TRANSLATED server variable is used based on which ever is available.
1262       *
1263       * @param  string $path The root path
1264       * @return Parser This instance
1265       * @since  3.6.0
1266       * @see    Parser::getDocumentRootDirectory()
1267       * @api
1268       */
1269  
1270      public function setDocumentRootDirectory($path)
1271      {
1272          $this->doc_root = rtrim($path, '\\/').$this->ds;
1273          return $this;
1274      }
1275  
1276      /**
1277       * Gets the current document root directory path.
1278       *
1279       * bc. $parser = new \Netcarver\Textile\Parser();
1280       * echo $parser->getDocumentRootDirectory();
1281       *
1282       * @return string Path to the document root directory
1283       * @since  3.6.0
1284       * @see    Parser::setDocumentRootDirectory()
1285       * @api
1286       */
1287  
1288      public function getDocumentRootDirectory()
1289      {
1290          return $this->doc_root;
1291      }
1292  
1293      /**
1294       * Enables lite mode.
1295       *
1296       * If enabled, allowed tags are limited. Parser will prevent the use extra
1297       * Textile formatting, accepting only paragraphs and blockquotes as valid
1298       * block tags.
1299       *
1300       * bc. $parser = new \Netcarver\Textile\Parser();
1301       * $parser
1302       *     ->setLite(true)
1303       *     ->parse('h1. Headings are disabled too');
1304       *
1305       * Generates:
1306       *
1307       * bc. <p>h1. Headings are disabled too</p>
1308       *
1309       * This doesn't prevent unsafe input values. If you wish to parse untrusted
1310       * user-given Textile input, also enable the restricted parser mode with
1311       * Parser::setRestricted().
1312       *
1313       * bc. $parser = new \Netcarver\Textile\Parser();
1314       * echo $parser
1315       *     ->setRestricted(true)
1316       *     ->setLite(true)
1317       *     ->parse('h1. Hello World!');
1318       *
1319       * @param  bool   $lite TRUE to enable
1320       * @return Parser This instance
1321       * @since  3.6.0
1322       * @see    Parser::isLiteModeEnabled()
1323       * @see    Parser::setRestricted()
1324       * @api
1325       */
1326  
1327      public function setLite($lite)
1328      {
1329          $this->lite = (bool) $lite;
1330          return $this;
1331      }
1332  
1333      /**
1334       * Gets the lite mode status.
1335       *
1336       * bc. $parser = new \Netcarver\Textile\Parser();
1337       * if ($parser->isLiteModeEnabled() === true) {
1338       *     echo 'Lite mode is enabled.';
1339       * }
1340       *
1341       * @return bool TRUE if enabled, FALSE otherwise
1342       * @since  3.6.0
1343       * @see    Parser::setLite()
1344       * @api
1345       */
1346  
1347      public function isLiteModeEnabled()
1348      {
1349          return (bool) $this->lite;
1350      }
1351  
1352      /**
1353       * Disables and enables images.
1354       *
1355       * If disabled, image tags are not generated. This option is ideal for
1356       * minimalist output such as text-only comments.
1357       *
1358       * bc. $parser = new \Netcarver\Textile\Parser();
1359       * echo $parser
1360       *     ->setImages(true)
1361       *     ->parse('!image.png!');
1362       *
1363       * Generates:
1364       *
1365       * bc. <p>!image.png!</p>
1366       *
1367       * @param  bool   $enabled TRUE to enable, FALSE to disable
1368       * @return Parser This instance
1369       * @since  3.6.0
1370       * @see    Parser::isImageTagEnabled()
1371       * @api
1372       */
1373  
1374      public function setImages($enabled)
1375      {
1376          $this->noimage = !$enabled;
1377          return $this;
1378      }
1379  
1380      /**
1381       * Whether images are enabled.
1382       *
1383       * bc. $parser = new \Netcarver\Textile\Parser();
1384       * if ($parser->isImageTagEnabled() === true) {
1385       *     echo 'Images are enabled.';
1386       * }
1387       *
1388       * @return bool TRUE if enabled, FALSE otherwise
1389       * @since  3.6.0
1390       * @see    Parser::setImages()
1391       * @api
1392       */
1393  
1394      public function isImageTagEnabled()
1395      {
1396          return !$this->noimage;
1397      }
1398  
1399      /**
1400       * Sets link relationship status value.
1401       *
1402       * This method sets the HTML relationship tokens that are applied to links
1403       * generated by PHP-Textile.
1404       *
1405       * bc. $parser = new \Netcarver\Textile\Parser();
1406       * echo $parser
1407       *     ->setLinkRelationShip('nofollow')
1408       *     ->parse('"Link":http://example.com/');
1409       *
1410       * Generates:
1411       *
1412       * bc. <p><a href="http://example.com/" rel="nofollow">Link</a></p>
1413       *
1414       * @param  string|array $relationship The HTML rel attribute value
1415       * @return Parser       This instance
1416       * @since  3.6.0
1417       * @see    Parser::getLinkRelationShip()
1418       * @api
1419       */
1420  
1421      public function setLinkRelationShip($relationship)
1422      {
1423          $this->rel = (string) implode(' ', (array) $relationship);
1424          return $this;
1425      }
1426  
1427      /**
1428       * Gets the link relationship status value.
1429       *
1430       * bc. $parser = new \Netcarver\Textile\Parser();
1431       * echo $parse
1432       *     ->setLinkRelationShip('nofollow')
1433       *     ->getLinkRelationShip();
1434       *
1435       * The above outputs "nofollow".
1436       *
1437       * @return string The value
1438       * @since  3.6.0
1439       * @see    Parser::setLinkRelationShip()
1440       * @api
1441       */
1442  
1443      public function getLinkRelationShip()
1444      {
1445          return $this->rel;
1446      }
1447  
1448      /**
1449       * Enables restricted parser mode.
1450       *
1451       * This option should be enabled when parsing untrusted user input,
1452       * including comments or forum posts. When enabled, the parser escapes any
1453       * raw HTML input, ignores unsafe attributes and links only whitelisted URL
1454       * schemes.
1455       *
1456       * For instance the following malicious input:
1457       *
1458       * bc. $parser = new \Netcarver\Textile\Parser();
1459       * echo $parser
1460       *     ->setRestricted(true)
1461       *     ->parse('Innocent _looking_ "link":javacript:window.alert().');
1462       *
1463       * Returns safe, sanitized HTML with valid Textile input still parsed:
1464       *
1465       * bc. <p>Innocent <em>looking</em> &#8220;link&#8221;:javacript:window.alert().</p>
1466       *
1467       * If left disabled, the parser allows users to mix raw HTML and Textile.
1468       * Using the parser in non-restricted on untrusted input, like comments
1469       * and forum posts, will lead to XSS issues, as users will be able to use
1470       * any HTML code, JavaScript links and Textile attributes in their input.
1471       *
1472       * @param  bool   $enabled TRUE to enable, FALSE to disable
1473       * @return Parser This instance
1474       * @since  3.6.0
1475       * @see    Parser::isRestrictedModeEnabled()
1476       * @api
1477       */
1478  
1479      public function setRestricted($enabled)
1480      {
1481          if ($enabled) {
1482              $this->url_schemes = $this->restricted_url_schemes;
1483              $this->restricted = true;
1484          } else {
1485              $this->url_schemes = $this->unrestricted_url_schemes;
1486              $this->restricted = false;
1487          }
1488  
1489          return $this;
1490      }
1491  
1492      /**
1493       * Whether restricted parser mode is enabled.
1494       *
1495       * bc. $parser = new \Netcarver\Textile\Parser();
1496       * if ($parser->isRestrictedModeEnabled() === true) {
1497       *     echo 'PHP-Textile is in restricted mode.';
1498       * }
1499       *
1500       * @return bool   TRUE if enabled, FALSE otherwise
1501       * @since  3.6.0
1502       * @see    Parser::setRestricted()
1503       * @api
1504       */
1505  
1506      public function isRestrictedModeEnabled()
1507      {
1508          return (bool) $this->restricted;
1509      }
1510  
1511      /**
1512       * Enables and disables raw blocks.
1513       *
1514       * When raw blocks are enabled, any paragraph blocks wrapped in a tag
1515       * not matching Parser::$blockContent or Parser::$phrasingContent will not
1516       * be parsed, and instead is left as is.
1517       *
1518       * bc. $parser = new \Netcarver\Textile\Parser();
1519       * echo $parser
1520       *     ->setRawBlocks(true)
1521       *     ->parse('<div>A *raw* block.</div>');
1522       *
1523       * The above generates:
1524       *
1525       * bc. <div>A *raw* block.</div>
1526       *
1527       * @param  bool   $enabled TRUE to enable, FALSE to disable
1528       * @return Parser This instance
1529       * @since  3.7.0
1530       * @see    Parser::isRawBlocksEnabled()
1531       * @see    Parser::isRawBlock()
1532       * @api
1533       */
1534  
1535      public function setRawBlocks($enabled)
1536      {
1537          $this->rawBlocksEnabled = (bool) $enabled;
1538          return $this;
1539      }
1540  
1541      /**
1542       * Whether raw blocks are enabled.
1543       *
1544       * bc. $parser = new \Netcarver\Textile\Parser();
1545       * if ($parser->isRawBlocksEnabled() === true) {
1546       *     echo 'Raw blocks are enabled';
1547       * }
1548       *
1549       * @return bool TRUE if enabled, FALSE otherwise
1550       * @since  3.7.0
1551       * @see    Parser::setRawBlocks()
1552       * @api
1553       */
1554  
1555      public function isRawBlocksEnabled()
1556      {
1557          return (bool) $this->rawBlocksEnabled;
1558      }
1559  
1560      /**
1561       * Enables and disables block-level tags and formatting features.
1562       *
1563       * When disabled, block-level tags aren't rendered. This allows PHP-Textile
1564       * to operate on a single line of text, rather than blocks of text and does
1565       * not wrap the output in paragraph tags.
1566       *
1567       * bc. $parser = new \Netcarving\Textile\Parser();
1568       * echo $parser
1569       *     ->setBlockTags(false)
1570       *     ->parse('h1. Hello *strong* world!');
1571       *
1572       * The above generates:
1573       *
1574       * bc. h1. Hello <strong>strong</strong> world!
1575       *
1576       * @param  bool   $enabled TRUE to enable, FALSE to disable
1577       * @return Parser This instance
1578       * @since  3.6.0
1579       * @see    Parser::isBlockTagEnabled()
1580       * @api
1581       */
1582  
1583      public function setBlockTags($enabled)
1584      {
1585          $this->blockTagsEnabled = (bool) $enabled;
1586          return $this;
1587      }
1588  
1589      /**
1590       * Whether block-level tags are enabled and parsed.
1591       *
1592       * bc. $parser = new \Netcarving\Textile\Parser();
1593       * if ($parser->isBlockTagAllowed() === true) {
1594       *     echo 'Block tags are enabled.';
1595       * }
1596       *
1597       * @return bool TRUE if enabled, FALSE otherwise
1598       * @since  3.6.0
1599       * @see    Parser::setBlockTags()
1600       * @api
1601       */
1602  
1603      public function isBlockTagEnabled()
1604      {
1605          return (bool) $this->blockTagsEnabled;
1606      }
1607  
1608      /**
1609       * Enables and disables line-wrapping.
1610       *
1611       * If enabled, line-breaks are replaced by target document's break tag. If
1612       * disabled, input document's line-breaks are ignored. This setting can be
1613       * used if the the input document's lines are pre-wrapped. For instance,
1614       * in case the input is from CLI content, or source code documentation.
1615       *
1616       * bc. $parser = new \Netcarving\Textile\Parser();
1617       * echo $parser
1618       *     ->setLineWrap(false)
1619       *     ->parse("Hello\nworld!");
1620       *
1621       * The above generates:
1622       *
1623       * bc. <p>Hello world!</p>
1624       *
1625       * @param  bool   $enabled TRUE to enable, FALSE to disable
1626       * @return Parser This instance
1627       * @since  3.6.0
1628       * @see    Parser::isLineWrapEnabled()
1629       * @api
1630       */
1631  
1632      public function setLineWrap($enabled)
1633      {
1634          $this->lineWrapEnabled = (bool) $enabled;
1635          return $this;
1636      }
1637  
1638      /**
1639       * Whether line-wrapping is enabled.
1640       *
1641       * bc. $parser = new \Netcarving\Textile\Parser();
1642       * if ($parser->isLineWrapEnabled() === true) {
1643       *     echo 'Line-wrapping is enabled.';
1644       * }
1645       *
1646       * @return bool TRUE if enabled, FALSE otherwise
1647       * @see    Parser::setLineWrap()
1648       * @since  3.6.0
1649       * @api
1650       */
1651  
1652      public function isLineWrapEnabled()
1653      {
1654          return (bool) $this->lineWrapEnabled;
1655      }
1656  
1657      /**
1658       * Sets a substitution symbol.
1659       *
1660       * This method lets you to redefine a substitution symbol. The following
1661       * sets the 'half' glyph:
1662       *
1663       * bc. $parser = new \Netcarver\Textile\Parser();
1664       * echo $parser
1665       *     ->setSymbol('half', '1&#8260;2')
1666       *     ->parse('Hello [1/2] World!');
1667       *
1668       * Generates:
1669       *
1670       * bc. <p>Hello 1&#⁄2 World!</p>
1671       *
1672       * Symbol can be set to FALSE to disable it:
1673       *
1674       * bc. $parser = new \Netcarver\Textile\Parser();
1675       * $parser->setSymbol('dimension', false);
1676       *
1677       * See Parser::getSymbol() to find out all available symbols.
1678       *
1679       * @param  string      $name  Name of the symbol to assign a new value to
1680       * @param  string|bool $value New value for the symbol, or FALSE to disable
1681       * @return Parser      This instance
1682       * @see    Parser::getSymbol()
1683       * @api
1684       */
1685  
1686      public function setSymbol($name, $value)
1687      {
1688          if ($value !== false) {
1689              $value = (string) $value;
1690          }
1691  
1692          $this->symbols[(string) $name] = $value;
1693          $this->rebuild_glyphs = true;
1694          return $this;
1695      }
1696  
1697      /**
1698       * Gets a symbol definitions.
1699       *
1700       * This method gets a symbol definition by name, or the full symbol table
1701       * as an array.
1702       *
1703       * bc. $parser = new \Netcarver\Textile\Parser();
1704       * echo $parser->getSymbol('dimension');
1705       *
1706       * To get all available symbol definitions:
1707       *
1708       * bc. $parser = new \Netcarver\Textile\Parser();
1709       * print_r($parser->getSymbol());
1710       *
1711       * @param  string|null  $name The name of the symbol, or NULL if requesting the symbol table
1712       * @return array|string The symbol table or the requested symbol
1713       * @throws \InvalidArgumentException
1714       * @see    Parser::setSymbol()
1715       * @api
1716       */
1717  
1718      public function getSymbol($name = null)
1719      {
1720          if ($name !== null) {
1721              if (isset($this->symbols[$name])) {
1722                  return $this->symbols[$name];
1723              }
1724  
1725              throw new \InvalidArgumentException('The specified name does not match any symbols.');
1726          }
1727  
1728          return $this->symbols;
1729      }
1730  
1731      /**
1732       * Sets base relative image prefix.
1733       *
1734       * The given string is used to prefix relative image paths, usually an
1735       * absolute HTTP address pointing a the site's image, or upload, directory.
1736       * PHP-Textile to convert relative paths to absolute, or prefixed paths.
1737       *
1738       * bc. $parser = new \Netcarver\Textile\Parser();
1739       * $parser->setImagePrefix('https://static.example.com/images/');
1740       *
1741       * @param  string $prefix The prefix
1742       * @return Parser This instance
1743       * @since  3.7.0
1744       * @see    Parser::getImagePrefix()
1745       * @api
1746       */
1747  
1748      public function setImagePrefix($prefix)
1749      {
1750          $this->relImagePrefix = (string) $prefix;
1751          return $this;
1752      }
1753  
1754      /**
1755       * Gets base relative image prefix.
1756       *
1757       * bc. $parser = new \Netcarver\Textile\Parser();
1758       * echo $parser->getImagePrefix();
1759       *
1760       * @return string The prefix
1761       * @since  3.7.0
1762       * @see    Parser::setImagePrefix()
1763       * @api
1764       */
1765  
1766      public function getImagePrefix()
1767      {
1768          return (string) $this->relImagePrefix;
1769      }
1770  
1771      /**
1772       * Sets base relative link prefix.
1773       *
1774       * The given string is used to prefix relative link paths. This allows
1775       * PHP-Textile convert relative paths to absolute, or prefixed, links.
1776       *
1777       * bc. $parser = new \Netcarver\Textile\Parser();
1778       * $parser->setLinkPrefix('https://example.com/');
1779       *
1780       * @param  string $prefix The prefix
1781       * @return Parser This instance
1782       * @since  3.7.0
1783       * @see    Parser::getLinkPrefix()
1784       * @api
1785       */
1786  
1787      public function setLinkPrefix($prefix)
1788      {
1789          $this->relLinkPrefix = (string) $prefix;
1790          return $this;
1791      }
1792  
1793      /**
1794       * Gets base relative link prefix.
1795       *
1796       * bc. $parser = new \Netcarver\Textile\Parser();
1797       * echo $parser->getLinkPrefix();
1798       *
1799       * @return string The prefix
1800       * @since  3.7.0
1801       * @see    Parser::setLinkPrefix()
1802       * @api
1803       */
1804  
1805      public function getLinkPrefix()
1806      {
1807          return (string) $this->relLinkPrefix;
1808      }
1809  
1810      /**
1811       * Sets base relative image and link directory path.
1812       *
1813       * This is used when Textile is supplied with a relative image or link path.
1814       * Allows client systems to have PHP-Textile convert relative paths to
1815       * absolute or prefixed paths. This method is used to set that base path,
1816       * usually an absolute HTTP address pointing to a directory. Note that
1817       * despite its name it applies to both links and images.
1818       *
1819       * bc. $parser = new \Netcarver\Textile\Parser();
1820       * $parser->setRelativeImagePrefix('https://example.com/');
1821       *
1822       * @param  string $prefix The string to prefix all relative image paths with
1823       * @return Parser This instance
1824       * @deprecated in 3.7.0
1825       * @see Parser::setImagePrefix
1826       * @see Parser::setLinkPrefix
1827       * @api
1828       */
1829  
1830      public function setRelativeImagePrefix($prefix = '')
1831      {
1832          trigger_error(
1833              'Parser::setRelativeImagePrefix() is deprecated.'.
1834              'Use Parser::setImagePrefix() and Parser::setLinkPrefix() instead.',
1835              E_USER_DEPRECATED
1836          );
1837  
1838          $this->relativeImagePrefix = $prefix;
1839          return $this;
1840      }
1841  
1842      /**
1843       * Enables dimensionless images.
1844       *
1845       * If enabled, image width and height attributes will not be included in
1846       * rendered image tags. Normally, PHP-Textile will add width and height
1847       * to images linked with a local relative path, as long as the image file
1848       * can be accessed.
1849       *
1850       * bc. $parser = new \Netcarver\Textile\Parser();
1851       * echo $parser
1852       *     ->setDimensionlessImages(true)
1853       *     ->parse('!image.jpg!');
1854       *
1855       * @param  bool   $dimensionless TRUE to disable image dimensions, FALSE to enable
1856       * @return Parser This instance
1857       * @see    Parser::getDimensionlessImages()
1858       * @api
1859       */
1860  
1861      public function setDimensionlessImages($dimensionless = true)
1862      {
1863          $this->dimensionless_images = (bool) $dimensionless;
1864          return $this;
1865      }
1866  
1867      /**
1868       * Whether dimensionless images are enabled.
1869       *
1870       * bc. $parser = new \Netcarver\Textile\Parser();
1871       * if ($parser->getDimensionlessImages() === true) {
1872       *     echo 'Images do not get dimensions.';
1873       * }
1874       *
1875       * @return bool TRUE if images will not get dimensions, FALSE otherwise
1876       * @see    Parser::setDimensionlessImages()
1877       * @api
1878       */
1879  
1880      public function getDimensionlessImages()
1881      {
1882          return (bool) $this->dimensionless_images;
1883      }
1884  
1885      /**
1886       * Gets PHP-Textile version number.
1887       *
1888       * bc. $parser = new \Netcarver\Textile\Parser();
1889       * echo $parser->getVersion();
1890       *
1891       * @return string Version number
1892       * @api
1893       */
1894  
1895      public function getVersion()
1896      {
1897          return $this->ver;
1898      }
1899  
1900      /**
1901       * Encodes the given text.
1902       *
1903       * bc. $parser = new \Netcarver\Textile\Parser();
1904       * $parser->textileEncode('Some content to encode.');
1905       *
1906       * @param  string $text The text to be encoded
1907       * @return string The encoded text
1908       * @api
1909       */
1910  
1911      public function textileEncode($text)
1912      {
1913          return (string)preg_replace('/&(?!(?:[a-z][a-z\d]*|#(?:\d+|x[a-f\d]+));)/i', '&amp;', $text);
1914      }
1915  
1916      /**
1917       * Parses the given Textile input according to the previously set options.
1918       *
1919       * The parser's features can be changed by using the various public setter
1920       * methods this class has. The most basic use case is:
1921       *
1922       * bc. $parser = new \Netcarver\Textile\Parser();
1923       * echo $parser->parse('h1. Hello World!');
1924       *
1925       * The above parses trusted input in full-feature mode, generating:
1926       *
1927       * bc. <h1>Hello World!</h1>
1928       *
1929       * Additionally the parser can be run in safe, restricted mode using the
1930       * Parser::setRestricted() method.
1931       *
1932       * bc. $parser = new \Netcarver\Textile\Parser();
1933       * echo $parser
1934       *     ->setRestricted(true)
1935       *     ->parse('h1. Hello World!');
1936       *
1937       * This enables restricted mode and allows safe parsing of untrusted input.
1938       * PHP-Textile will disable unsafe attributes, links and escapes any raw
1939       * HTML input. This option should be enabled when parsing untrusted user
1940       * input.
1941       *
1942       * If restricted mode is disabled, the parser allows users to mix raw HTML
1943       * and Textile.
1944       *
1945       * @param  string $text The Textile input to parse
1946       * @return string Parsed Textile input
1947       * @since  3.6.0
1948       * @api
1949       */
1950  
1951      public function parse($text)
1952      {
1953          $this->prepare();
1954          $text = (string) $text;
1955  
1956          if ($this->isRestrictedModeEnabled()) {
1957              // Escape any raw HTML.
1958              $text = $this->encodeHTML($text, false);
1959          }
1960  
1961          $text = $this->cleanWhiteSpace($text);
1962          $text = $this->cleanUniqueTokens($text);
1963  
1964          if ($this->isBlockTagEnabled()) {
1965              if ($this->isLiteModeEnabled()) {
1966                  $this->blocktag_whitelist = array('bq', 'p');
1967                  $text = $this->blocks($text."\n\n");
1968              } else {
1969                  $this->blocktag_whitelist = array(
1970                      'bq',
1971                      'p',
1972                      'bc',
1973                      'notextile',
1974                      'pre',
1975                      'h[1-6]',
1976                      'fn'.$this->regex_snippets['digit'].'+',
1977                      '###',
1978                  );
1979                  $text = $this->blocks($text);
1980                  $text = $this->placeNoteLists($text);
1981              }
1982          } else {
1983              $text .= "\n\n";
1984  
1985              // Treat quoted quote as a special glyph.
1986              $text = $this->glyphQuotedQuote($text);
1987  
1988              // Inline markup (em, strong, sup, sub, del etc).
1989              $text = $this->spans($text);
1990  
1991              // Glyph level substitutions (mainly typographic -- " & ' => curly quotes, -- => em-dash etc.
1992              $text = $this->glyphs($text);
1993          }
1994  
1995          $text = $this->retrieve($text);
1996          $text = $this->replaceGlyphs($text);
1997          $text = $this->retrieveTags($text);
1998          $text = $this->retrieveURLs($text);
1999  
2000          $text = str_replace("<br />", "<br />\n", $text);
2001  
2002          return $text;
2003      }
2004  
2005      /**
2006       * Parses the given Textile input in un-restricted mode.
2007       *
2008       * This method is deprecated, use Parser::parse() method instead.
2009       * This method is equilavent of:
2010       *
2011       * bc. $parser = new \Netcarver\Textile\Parser();
2012       * echo $parser->parse('h1. Hello World!');
2013       *
2014       * Additinal arguments can be passed with setter methods:
2015       *
2016       * bc. $parser = new \Netcarver\Textile\Parser();
2017       * echo $parser
2018       *     ->setLite(true)
2019       *     ->setImages(true)
2020       *     ->setLinkRelationShip('nofollow')
2021       *     ->parse('h1. Hello World!');
2022       *
2023       * @param  string $text    The Textile input to parse
2024       * @param  bool   $lite    Switch to lite mode
2025       * @param  bool   $encode  Encode input and return
2026       * @param  bool   $noimage Disables images
2027       * @param  bool   $strict  This argument is ignored
2028       * @param  string $rel     Relationship attribute applied to generated links
2029       * @return string Parsed $text
2030       * @see    Parser::parse()
2031       * @deprecated in 3.6.0
2032       * @api
2033       */
2034  
2035      public function textileThis($text, $lite = false, $encode = false, $noimage = false, $strict = false, $rel = '')
2036      {
2037          if ($encode) {
2038              trigger_error(
2039                  '$encode argument is deprecated. Use Parser::textileEncode() instead.',
2040                  E_USER_DEPRECATED
2041              );
2042  
2043              return $this->textileEncode($text);
2044          }
2045  
2046          trigger_error(
2047              'Parser::textileThis() is deprecated. Use Parser::parse() instead.',
2048              E_USER_DEPRECATED
2049          );
2050  
2051          return $this
2052              ->setRestricted(false)
2053              ->setLite($lite)
2054              ->setBlockTags(true)
2055              ->setImages(!$noimage)
2056              ->setLinkRelationShip($rel)
2057              ->parse($text);
2058      }
2059  
2060      /**
2061       * Parses the given Textile input in restricted mode.
2062       *
2063       * This method is deprecated, use Parser::parse() method with
2064       * Parser::setRestricted() and Parser::setLite() enabled, and
2065       * Parser::setImages() disabled.
2066       *
2067       * This method's defaults are identical to:
2068       *
2069       * bc. $parser = new \Netcarver\Textile\Parser();
2070       * echo $parser
2071       *     ->setRestricted(true)
2072       *     ->setLite(true)
2073       *     ->setImages(false)
2074       *     ->setLinkRelationShip('nofollow')
2075       *     ->parse('h1. Hello World!');
2076       *
2077       * As in the above, restricted mode should be used when parsing any
2078       * untrusted user input, including comments or forum posts.
2079       *
2080       * @param  string $text    The Textile input to parse
2081       * @param  bool   $lite    Controls lite mode, allowing extra formatting
2082       * @param  bool   $noimage Allow images
2083       * @param  string $rel     Relationship attribute applied to generated links
2084       * @return string Parsed input
2085       * @see    Parser::setRestricted()
2086       * @see    Parser::setLite()
2087       * @see    Parser::setImages()
2088       * @see    Parser::setLinkRelationShip()
2089       * @see    Parser::parse()
2090       * @deprecated in 3.6.0
2091       * @api
2092       */
2093  
2094      public function textileRestricted($text, $lite = true, $noimage = true, $rel = 'nofollow')
2095      {
2096          trigger_error(
2097              'Parser::textileRestricted() is deprecated. Use Parser::parse() with Parser::setRestricted() instead.',
2098              E_USER_DEPRECATED
2099          );
2100  
2101          return $this
2102              ->setRestricted(true)
2103              ->setLite($lite)
2104              ->setBlockTags(true)
2105              ->setImages(!$noimage)
2106              ->setLinkRelationShip($rel)
2107              ->parse($text);
2108      }
2109  
2110      /**
2111       * Parses Textile syntax.
2112       *
2113       * This method performs common parse actions.
2114       *
2115       * @param  string $text The input to parse
2116       * @param  bool   $lite Enables lite mode
2117       * @return string Parsed input
2118       * @deprecated in 3.6.0
2119       */
2120  
2121      protected function textileCommon($text, $lite)
2122      {
2123          trigger_error('Parser::textileCommon() is deprecated.', E_USER_DEPRECATED);
2124          return $this->setLite($lite)->parse($text);
2125      }
2126  
2127      /**
2128       * Prepares the glyph patterns from the symbol table.
2129       *
2130       * @see Parser::setSymbol()
2131       * @see Parser::getSymbol()
2132       */
2133  
2134      protected function prepGlyphs()
2135      {
2136          if ($this->rebuild_glyphs === false) {
2137              return;
2138          }
2139  
2140          $pnc = '[[:punct:]]';
2141          $cur = '';
2142  
2143          if ($this->regex_snippets['cur']) {
2144              $cur = '(?:['.$this->regex_snippets['cur'].']'.$this->regex_snippets['space'].'*)?';
2145          }
2146  
2147          $this->glyph_search = array();
2148          $this->glyph_replace = array();
2149  
2150          // Dimension sign
2151          if ($this->symbols['dimension'] !== false) {
2152              $this->glyph_search[] = '/(?<=\b|x)([0-9]++[\])]?[\'"]? ?)[x]( ?[\[(]?)(?=[+-]?'.$cur.'[0-9]*\.?[0-9]++)/i'.
2153              $this->regex_snippets['mod'];
2154              $this->glyph_replace[] = '$1'.$this->symbols['dimension'].'$2';
2155          }
2156  
2157          // Apostrophe
2158          if ($this->symbols['apostrophe'] !== false) {
2159              $this->glyph_search[] = '/('.$this->regex_snippets['wrd'].'|\))\''.
2160              '('.$this->regex_snippets['wrd'].')/'.$this->regex_snippets['mod'];
2161              $this->glyph_replace[] = '$1'.$this->symbols['apostrophe'].'$2';
2162  
2163              // Back in '88/the '90s but not in his '90s', '1', '1.' '10m' or '5.png'
2164              $this->glyph_search[] = '/('.$this->regex_snippets['space'].')\''.
2165              '(\d+'.$this->regex_snippets['wrd'].'?)\b(?![.]?['.$this->regex_snippets['wrd'].']*?\')/'.
2166              $this->regex_snippets['mod'];
2167              $this->glyph_replace[] = '$1'.$this->symbols['apostrophe'].'$2';
2168          }
2169  
2170          // Single open following open bracket
2171          if ($this->symbols['quote_single_open'] !== false) {
2172              $this->glyph_search[] = "/([([{])'(?=\S)/".$this->regex_snippets['mod'];
2173              $this->glyph_replace[] = '$1'.$this->symbols['quote_single_open'];
2174          }
2175  
2176          // Single closing
2177          if ($this->symbols['quote_single_close'] !== false) {
2178              $this->glyph_search[] = '/(\S)\'(?='.$this->regex_snippets['space'].'|'.$pnc.'|<|$)/'.
2179                  $this->regex_snippets['mod'];
2180              $this->glyph_replace[] = '$1'.$this->symbols['quote_single_close'];
2181          }
2182  
2183          // Default single opening
2184          if ($this->symbols['quote_single_open'] !== false) {
2185              $this->glyph_search[] = "/'/";
2186              $this->glyph_replace[] = $this->symbols['quote_single_open'];
2187          }
2188  
2189          // Double open following an open bracket. Allows things like Hello ["(Mum) & dad"]
2190          if ($this->symbols['quote_double_open'] !== false) {
2191              $this->glyph_search[] = '/([([{])"(?=\S)/'.$this->regex_snippets['mod'];
2192              $this->glyph_replace[] = '$1'.$this->symbols['quote_double_open'];
2193          }
2194  
2195          // Double closing
2196          if ($this->symbols['quote_double_close'] !== false) {
2197              $this->glyph_search[] = '/(\S)"(?='.$this->regex_snippets['space'].'|'.$pnc.'|<|$)/'.
2198                  $this->regex_snippets['mod'];
2199              $this->glyph_replace[] = '$1'.$this->symbols['quote_double_close'];
2200          }
2201  
2202          // Default double opening
2203          if ($this->symbols['quote_double_open'] !== false) {
2204              $this->glyph_search[] = '/"/';
2205              $this->glyph_replace[] = $this->symbols['quote_double_open'];
2206          }
2207  
2208          if ($this->symbols['acronym'] === null) {
2209              if ($this->getDocumentType() === 'html5') {
2210                  $acronym = '<abbr title="{title}">{content}</abbr>';
2211              } else {
2212                  $acronym = '<acronym title="{title}">{content}</acronym>';
2213              }
2214          } else {
2215              $acronym = $this->symbols['acronym'];
2216          }
2217  
2218          // 3+ uppercase acronym
2219          if ($acronym !== false) {
2220              $this->glyph_search[] = '/\b(['.$this->regex_snippets['abr'].']['.
2221                  $this->regex_snippets['acr'].']{2,})\b(?:[(]([^)]*)[)])/'.$this->regex_snippets['mod'];
2222              $this->glyph_replace[] = $this->replaceMarkers($acronym, array(
2223                  'title' => '$2',
2224                  'content' => '$1',
2225              ));
2226          }
2227  
2228          // 3+ uppercase
2229          if ($this->symbols['caps'] !== false) {
2230              $this->glyph_search[] = '/('.$this->regex_snippets['space'].'|^|[>(;-])'.
2231                  '(['.$this->regex_snippets['abr'].']{3,})'.
2232                  '(['.$this->regex_snippets['nab'].']*)(?='.
2233                  $this->regex_snippets['space'].'|'.$pnc.'|<|$)'.
2234                  '(?=[^">]*?(<|$))/'.$this->regex_snippets['mod'];
2235              $this->glyph_replace[] = $this->replaceMarkers('$1'.$this->symbols['caps'].'$3', array(
2236                  'content' => $this->uid.':glyph:$2',
2237              ));
2238          }
2239  
2240          // Ellipsis
2241          if ($this->symbols['ellipsis'] !== false) {
2242              $this->glyph_search[] = '/([^.]?)\.{3}/';
2243              $this->glyph_replace[] = '$1'.$this->symbols['ellipsis'];
2244          }
2245  
2246          // em dash
2247          if ($this->symbols['emdash'] !== false) {
2248              $this->glyph_search[] = '/--/';
2249              $this->glyph_replace[] = $this->symbols['emdash'];
2250          }
2251  
2252          // en dash
2253          if ($this->symbols['endash'] !== false) {
2254              $this->glyph_search[] = '/ - /';
2255              $this->glyph_replace[] = ' '.$this->symbols['endash'].' ';
2256          }
2257  
2258          // Trademark
2259          if ($this->symbols['trademark'] !== false) {
2260              $this->glyph_search[] = '/(\b ?|'.$this->regex_snippets['space'].'|^)[([]TM[])]/i'.
2261                  $this->regex_snippets['mod'];
2262              $this->glyph_replace[] = '$1'.$this->symbols['trademark'];
2263          }
2264  
2265          // Registered
2266          if ($this->symbols['registered'] !== false) {
2267              $this->glyph_search[] = '/(\b ?|'.$this->regex_snippets['space'].'|^)[([]R[])]/i'.
2268                  $this->regex_snippets['mod'];
2269              $this->glyph_replace[] = '$1'.$this->symbols['registered'];
2270          }
2271  
2272          // Copyright
2273          if ($this->symbols['copyright'] !== false) {
2274              $this->glyph_search[] = '/(\b ?|'.$this->regex_snippets['space'].'|^)[([]C[])]/i'.
2275                  $this->regex_snippets['mod'];
2276              $this->glyph_replace[] = '$1'.$this->symbols['copyright'];
2277          }
2278  
2279          // 1/4
2280          if ($this->symbols['quarter'] !== false) {
2281              $this->glyph_search[] = '/[([]1\/4[])]/';
2282              $this->glyph_replace[] = $this->symbols['quarter'];
2283          }
2284  
2285          // 1/2
2286          if ($this->symbols['half'] !== false) {
2287              $this->glyph_search[] = '/[([]1\/2[])]/';
2288              $this->glyph_replace[] = $this->symbols['half'];
2289          }
2290  
2291          // 3/4
2292          if ($this->symbols['threequarters'] !== false) {
2293              $this->glyph_search[] = '/[([]3\/4[])]/';
2294              $this->glyph_replace[] = $this->symbols['threequarters'];
2295          }
2296  
2297          // Degrees -- that's a small 'oh'
2298          if ($this->symbols['degrees'] !== false) {
2299              $this->glyph_search[] = '/[([]o[])]/';
2300              $this->glyph_replace[] = $this->symbols['degrees'];
2301          }
2302  
2303          // Plus minus
2304          if ($this->symbols['plusminus'] !== false) {
2305              $this->glyph_search[] = '/[([]\+\/-[])]/';
2306              $this->glyph_replace[] = $this->symbols['plusminus'];
2307          }
2308  
2309          // No need to rebuild next run unless a symbol is redefined
2310          $this->rebuild_glyphs = false;
2311      }
2312  
2313      /**
2314       * Gets the maximum allowed link index.
2315       *
2316       * @return int Maximum link index
2317       * @since  3.5.5
2318       */
2319  
2320      protected function getMaxLinkIndex()
2321      {
2322          return 1000000;
2323      }
2324  
2325      /**
2326       * Prepares the parser for parsing.
2327       *
2328       * This method prepares the transient internal state of
2329       * Textile parser in preparation for parsing a new document.
2330       *
2331       * @param  bool|null   $lite    Controls lite mode
2332       * @param  bool|null   $noimage Disallow images
2333       * @param  string|null $rel     A relationship attribute applied to links
2334       */
2335  
2336      protected function prepare($lite = null, $noimage = null, $rel = null)
2337      {
2338          if ($this->linkIndex >= $this->getMaxLinkIndex()) {
2339              $this->linkPrefix .= '-';
2340              $this->linkIndex = 1;
2341          }
2342  
2343          $this->unreferencedNotes = array();
2344          $this->notelist_cache = array();
2345          $this->notes = array();
2346          $this->urlrefs = array();
2347          $this->shelf = array();
2348          $this->fn = array();
2349          $this->span_depth = 0;
2350          $this->refIndex = 1;
2351          $this->refCache = array();
2352          $this->note_index = 1;
2353  
2354          if ($lite !== null) {
2355              trigger_error(
2356                  '$lite argument is deprecated. Use Parser::setLite() instead.',
2357                  E_USER_DEPRECATED
2358              );
2359  
2360              $this->setLite($lite);
2361          }
2362  
2363          if ($noimage !== null) {
2364              trigger_error(
2365                  '$noimage argument is deprecated. Use Parser::setImages() instead.',
2366                  E_USER_DEPRECATED
2367              );
2368  
2369              $this->setImages(!$noimage);
2370          }
2371  
2372          if ($rel !== null) {
2373              trigger_error(
2374                  '$rel argument is deprecated. Use Parser::setRelative() instead.',
2375                  E_USER_DEPRECATED
2376              );
2377  
2378              $this->setLinkRelationShip($rel);
2379          }
2380  
2381          if ($this->patterns === null) {
2382              $block = implode('|', $this->blockContent);
2383              $divider = implode('|', $this->dividerContent);
2384              $phrasing = implode('|', $this->phrasingContent);
2385  
2386              $this->patterns = array(
2387                  'block' => '/^(?:'.$block.')$/i',
2388                  'contained' => '/^<\/?(?P<open>[^\s<>\/]+)(?:\s.*|\/?>.*|)>$/si',
2389                  'divider' => '/^(?:<\/?('.$divider.')(?:\s[^<>]*?|\/?)>(?:<\/\1\s*?>)?)+$/si',
2390                  'phrasing' => '/^(?:'.$phrasing.')$/i',
2391                  'wrapped' => '/^<\/?(?P<open>[^\s<>\/]+)[^<>]*?>(?:.*<\/\1\s*?>)?$/si',
2392                  'unwrappable' => '/<\/?(?:'.$block.')(?:\s[^<>]*?|\/?)>/si',
2393              );
2394          }
2395  
2396          $this->prepGlyphs();
2397      }
2398  
2399      /**
2400       * Cleans a HTML attribute value.
2401       *
2402       * This method checks for presence of URL encoding in the value.
2403       * If the number encoded characters exceeds the thereshold,
2404       * the input is discarded. Otherwise the encoded
2405       * instances are decoded.
2406       *
2407       * This method also strips any ", ' and = characters
2408       * from the given value. This method does not guarantee
2409       * valid HTML or full sanitization.
2410       *
2411       * @param  string $in The input string
2412       * @return string Cleaned string
2413       */
2414  
2415      protected function cleanAttribs($in)
2416      {
2417          $tmp = $in;
2418          $before = -1;
2419          $after = 0;
2420          $max = 3;
2421          $i = 0;
2422  
2423          while (($after != $before) && ($i < $max)) {
2424              $before = strlen($tmp);
2425              $tmp = rawurldecode($tmp);
2426              $after = strlen($tmp);
2427              $i++;
2428          }
2429  
2430          if ($i === $max) {
2431              // If we hit the max allowed decodes, assume the input is tainted and consume it.
2432              $out = '';
2433          } else {
2434              $out = str_replace(array('"', "'", '='), '', $tmp);
2435          }
2436  
2437          return $out;
2438      }
2439  
2440      /**
2441       * Constructs a HTML tag from an object.
2442       *
2443       * This is a helper method that creates a new
2444       * instance of \Netcarver\Textile\Tag.
2445       *
2446       * @param  string $name        The HTML element name
2447       * @param  array  $atts        HTML attributes applied to the tag
2448       * @param  bool   $selfclosing Determines if the tag should be selfclosing
2449       * @return Tag
2450       */
2451  
2452      protected function newTag($name, $atts, $selfclosing = true)
2453      {
2454          return new Tag($name, $atts, $selfclosing);
2455      }
2456  
2457      /**
2458       * Parses Textile attributes.
2459       *
2460       * @param  string $in         The Textile attribute string to be parsed
2461       * @param  string $element    Focus the routine to interpret the attributes as applying to a specific HTML tag
2462       * @param  bool   $include_id If FALSE, IDs are not included in the attribute list
2463       * @param  string $autoclass  An additional classes applied to the output
2464       * @return string HTML attribute list
2465       * @see    Parser::parseAttribsToArray()
2466       */
2467  
2468      protected function parseAttribs($in, $element = '', $include_id = true, $autoclass = '')
2469      {
2470          $o = $this->parseAttribsToArray($in, $element, $include_id, $autoclass);
2471  
2472          return $this->formatAttributeString($o);
2473      }
2474  
2475      /**
2476       * Converts an array of named attribute => value mappings to a string.
2477       *
2478       * @param array $attribute_array
2479       * @return string
2480       */
2481  
2482      protected function formatAttributeString(array $attribute_array)
2483      {
2484          $out = '';
2485  
2486          if (count($attribute_array)) {
2487              foreach ($attribute_array as $k => $v) {
2488                  $out .= " $k=\"$v\"";
2489              }
2490          }
2491  
2492          return $out;
2493      }
2494  
2495      /**
2496       * Parses Textile attributes into an array.
2497       *
2498       * @param  string $in         The Textile attribute string to be parsed
2499       * @param  string $element    Focus the routine to interpret the attributes as applying to a specific HTML tag
2500       * @param  bool   $include_id If FALSE, IDs are not included in the attribute list
2501       * @param  string $autoclass  An additional classes applied to the output
2502       * @return array  HTML attributes as key => value mappings
2503       * @see    Parser::parseAttribs()
2504       */
2505  
2506      protected function parseAttribsToArray($in, $element = '', $include_id = true, $autoclass = '')
2507      {
2508          $style = array();
2509          $class = '';
2510          $lang = '';
2511          $colspan = '';
2512          $rowspan = '';
2513          $span = '';
2514          $width = '';
2515          $id = '';
2516          $matched = $in;
2517  
2518          if ($element == 'td') {
2519              if (preg_match("/\\\\([0-9]+)/", $matched, $csp)) {
2520                  $colspan = $csp[1];
2521              }
2522  
2523              if (preg_match("/\/([0-9]+)/", $matched, $rsp)) {
2524                  $rowspan = $rsp[1];
2525              }
2526          }
2527  
2528          if ($element == 'td' or $element == 'tr') {
2529              if (preg_match("/^($this->vlgn)/", $matched, $vert)) {
2530                  $style[] = "vertical-align:" . $this->vAlign($vert[1]);
2531              }
2532          }
2533  
2534          if (preg_match("/\{([^}]*)\}/", $matched, $sty)) {
2535              if ($sty[1] = $this->cleanAttribs($sty[1])) {
2536                  $style[] = rtrim($sty[1], ';');
2537              }
2538  
2539              $matched = str_replace($sty[0], '', $matched);
2540          }
2541  
2542          if (preg_match("/\[([^]]+)\]/U", $matched, $lng)) {
2543              // Consume entire lang block -- valid or invalid.
2544              $matched = str_replace($lng[0], '', $matched);
2545              if ($element === 'code' && preg_match("/\[([a-zA-Z0-9_-]+)\]/U", $lng[0], $lng1)) {
2546                  $lang = $lng1[1];
2547              } elseif (preg_match("/\[([a-zA-Z]{2}(?:[\-\_][a-zA-Z]{2})?)\]/U", $lng[0], $lng2)) {
2548                  $lang = $lng2[1];
2549              }
2550          }
2551  
2552          if (preg_match("/\(([^()]+)\)/U", $matched, $cls)) {
2553              $class_regex = "/^([-a-zA-Z 0-9_\.]*)$/";
2554  
2555              // Consume entire class block -- valid or invalid.
2556              $matched = str_replace($cls[0], '', $matched);
2557  
2558              // Only allow a restricted subset of the CSS standard characters for classes/ids.
2559              // No encoding markers allowed.
2560              if (preg_match("/\(([-a-zA-Z 0-9_\.\:\#]+)\)/U", $cls[0], $cls)) {
2561                  $hashpos = strpos($cls[1], '#');
2562                  // If a textile class block attribute was found with a '#' in it
2563                  // split it into the css class and css id...
2564                  if (false !== $hashpos) {
2565                      if (preg_match("/#([-a-zA-Z0-9_\.\:]*)$/", substr($cls[1], $hashpos), $ids)) {
2566                          $id = $ids[1];
2567                      }
2568  
2569                      if (preg_match($class_regex, substr($cls[1], 0, $hashpos), $ids)) {
2570                          $class = $ids[1];
2571                      }
2572                  } else {
2573                      if (preg_match($class_regex, $cls[1], $ids)) {
2574                          $class = $ids[1];
2575                      }
2576                  }
2577              }
2578          }
2579  
2580          if (preg_match("/([(]+)/", $matched, $pl)) {
2581              $style[] = "padding-left:" . strlen($pl[1]) . "em";
2582              $matched = str_replace($pl[0], '', $matched);
2583          }
2584  
2585          if (preg_match("/([)]+)/", $matched, $pr)) {
2586              $style[] = "padding-right:" . strlen($pr[1]) . "em";
2587              $matched = str_replace($pr[0], '', $matched);
2588          }
2589  
2590          if (preg_match("/($this->hlgn)/", $matched, $horiz)) {
2591              $style[] = "text-align:" . $this->hAlign($horiz[1]);
2592          }
2593  
2594          if ($element == 'col') {
2595              if (preg_match("/(?:\\\\([0-9]+))?{$this->regex_snippets['space']}*([0-9]+)?/", $matched, $csp)) {
2596                  $span = isset($csp[1]) ? $csp[1] : '';
2597                  $width = isset($csp[2]) ? $csp[2] : '';
2598              }
2599          }
2600  
2601          if ($this->isRestrictedModeEnabled()) {
2602              $o = array();
2603              $class = trim($autoclass);
2604  
2605              if ($class) {
2606                  $o['class'] = $this->cleanAttribs($class);
2607              }
2608  
2609              if ($lang) {
2610                  $o['lang'] = $this->cleanAttribs($lang);
2611              }
2612  
2613              ksort($o);
2614              return $o;
2615          } else {
2616              $class = trim($class . ' ' . $autoclass);
2617          }
2618  
2619          $o = array();
2620  
2621          if ($class) {
2622              $o['class'] = $this->cleanAttribs($class);
2623          }
2624  
2625          if ($colspan) {
2626              $o['colspan'] = $this->cleanAttribs($colspan);
2627          }
2628  
2629          if ($id && $include_id) {
2630              $o['id'] = $this->cleanAttribs($id);
2631          }
2632  
2633          if ($lang) {
2634              $o['lang'] = $this->cleanAttribs($lang);
2635          }
2636  
2637          if ($rowspan) {
2638              $o['rowspan'] = $this->cleanAttribs($rowspan);
2639          }
2640  
2641          if ($span) {
2642              $o['span'] = $this->cleanAttribs($span);
2643          }
2644  
2645          if (!empty($style)) {
2646              $so = '';
2647              $tmps = array();
2648  
2649              foreach ($style as $s) {
2650                  $parts = explode(';', $s);
2651  
2652                  foreach ($parts as $p) {
2653                      if ($p = trim(trim($p), ":")) {
2654                          $tmps[] = $p;
2655                      }
2656                  }
2657              }
2658  
2659              sort($tmps);
2660  
2661              foreach ($tmps as $p) {
2662                  if ($p) {
2663                      $so .= $p.';';
2664                  }
2665              }
2666  
2667              $o['style'] = trim(str_replace(array("\n", ';;'), array('', ';'), $so));
2668          }
2669  
2670          if ($width) {
2671              $o['width'] = $this->cleanAttribs($width);
2672          }
2673  
2674          ksort($o);
2675          return $o;
2676      }
2677  
2678      /**
2679       * Checks whether the text block should be wrapped in a paragraph.
2680       *
2681       * @param  string $text The input string
2682       * @return bool   TRUE if the text can be wrapped, FALSE otherwise
2683       */
2684  
2685      protected function hasRawText($text)
2686      {
2687          if (preg_match($this->patterns['unwrappable'], $text)) {
2688              return false;
2689          }
2690  
2691          if (preg_match($this->patterns['divider'], $text)) {
2692              return false;
2693          }
2694  
2695          if (preg_match($this->patterns['wrapped'], $text, $m)) {
2696              if (preg_match($this->patterns['phrasing'], $m['open'])) {
2697                  return true;
2698              }
2699  
2700              return false;
2701          }
2702  
2703          return true;
2704      }
2705  
2706      /**
2707       * Parses textile table structures into HTML.
2708       *
2709       * @param  string $text The textile input
2710       * @return string The parsed text
2711       */
2712  
2713      protected function tables($text)
2714      {
2715          $text = $text . "\n\n";
2716          return (string)preg_replace_callback(
2717              "/^(?:table(?P<tatts>_?{$this->s}{$this->a}{$this->cls})\.".
2718              "(?P<summary>.*)?\n)?^(?P<rows>{$this->a}{$this->cls}\.? ?\|.*\|){$this->regex_snippets['space']}*\n\n/smU",
2719              array($this, "fTable"),
2720              $text
2721          );
2722      }
2723  
2724      /**
2725       * Constructs a HTML table from a textile table structure.
2726       *
2727       * This method is used by Parser::tables() to process
2728       * found table structures.
2729       *
2730       * @param  array  $matches
2731       * @return string HTML table
2732       * @see    Parser::tables()
2733       */
2734  
2735      protected function fTable($matches)
2736      {
2737          $tatts = $this->parseAttribs($matches['tatts'], 'table');
2738          $space = $this->regex_snippets['space'];
2739  
2740          $cap = '';
2741          $colgrp = '';
2742          $last_rgrp = '';
2743          $c_row = 1;
2744          $sum = '';
2745          $rows = array();
2746  
2747          $summary = trim($matches['summary']);
2748  
2749          if ($summary !== '') {
2750              $sum = ' summary="'.htmlspecialchars($summary, ENT_QUOTES, 'UTF-8').'"';
2751          }
2752  
2753          foreach (preg_split("/\|{$space}*?$/m", $matches['rows'], -1, PREG_SPLIT_NO_EMPTY) as $row) {
2754              $row = ltrim($row);
2755  
2756              // Caption -- can only occur on row 1, otherwise treat '|=. foo |...'
2757              // as a normal center-aligned cell.
2758              if (($c_row <= 1) && preg_match(
2759                  "/^\|\=(?P<capts>$this->s$this->a$this->cls)\. (?P<cap>[^\n]*)(?P<row>.*)/s",
2760                  ltrim($row),
2761                  $cmtch
2762              )) {
2763                  $capts = $this->parseAttribs($cmtch['capts']);
2764                  $cap = "\t<caption".$capts.">".trim($cmtch['cap'])."</caption>\n";
2765                  $row = ltrim($cmtch['row']);
2766                  if (!$row) {
2767                      continue;
2768                  }
2769              }
2770  
2771              $c_row += 1;
2772  
2773              // Colgroup
2774              if (preg_match("/^\|:(?P<cols>$this->s$this->a$this->cls\. .*)/m", ltrim($row), $gmtch)) {
2775                  // Is this colgroup def missing a closing pipe? If so, there
2776                  // will be a newline in the middle of $row somewhere.
2777                  $nl = strpos($row, "\n");
2778                  $idx = 0;
2779  
2780                  foreach (explode('|', str_replace('.', '', $gmtch['cols'])) as $col) {
2781                      $gatts = $this->parseAttribs(trim($col), 'col');
2782                      $colgrp .= "\t<col".(($idx==0) ? "group".$gatts.">" : $gatts." />")."\n";
2783                      $idx++;
2784                  }
2785  
2786                  $colgrp .= "\t</colgroup>\n";
2787  
2788                  if ($nl === false) {
2789                      continue;
2790                  } else {
2791                      // Recover from our missing pipe and process the rest of the line.
2792                      $row = ltrim(substr($row, $nl));
2793                  }
2794              }
2795  
2796              // Row group
2797              $rgrpatts = $rgrp = '';
2798  
2799              if (preg_match(
2800                  "/(:?^\|(?P<part>$this->vlgn)(?P<rgrpatts>$this->s$this->a$this->cls)\.{$space}*$\n)?^(?P<row>.*)/sm",
2801                  ltrim($row),
2802                  $grpmatch
2803              )) {
2804                  if (isset($grpmatch['part'])) {
2805                      if ($grpmatch['part'] === '^') {
2806                          $rgrp = 'head';
2807                      } elseif ($grpmatch['part'] === '~') {
2808                          $rgrp = 'foot';
2809                      } elseif ($grpmatch['part'] === '-') {
2810                          $rgrp = 'body';
2811                      }
2812                  }
2813  
2814                  if (isset($grpmatch['part'])) {
2815                      $rgrpatts = $this->parseAttribs($grpmatch['rgrpatts']);
2816                  }
2817  
2818                  if (isset($grpmatch['row'])) {
2819                      $row = $grpmatch['row'];
2820                  }
2821              }
2822  
2823              if (preg_match("/^(?P<ratts>$this->a$this->cls\. )(?P<row>.*)/m", ltrim($row), $rmtch)) {
2824                  $ratts = $this->parseAttribs($rmtch['ratts'], 'tr');
2825                  $row = $rmtch['row'];
2826              } else {
2827                  $ratts = '';
2828              }
2829  
2830              $cells = array();
2831              $cellctr = 0;
2832  
2833              foreach (explode("|", $row) as $cell) {
2834                  $ctyp = "d";
2835  
2836                  if (preg_match("/^_(?=[{$this->regex_snippets['space']}[:punct:]])/", $cell)) {
2837                      $ctyp = "h";
2838                  }
2839  
2840                  if (preg_match("/^(?P<catts>_?$this->s$this->a$this->cls\. )(?P<cell>.*)/s", $cell, $cmtch)) {
2841                      $catts = $this->parseAttribs($cmtch['catts'], 'td');
2842                      $cell = $cmtch['cell'];
2843                  } else {
2844                      $catts = '';
2845                  }
2846  
2847                  if (!$this->isLiteModeEnabled()) {
2848                      $a = array();
2849  
2850                      if (preg_match('/(?<space>'.$this->regex_snippets['space'].'*)(?P<cell>.*)/s', $cell, $a)) {
2851                          $cell = $this->redclothLists($a['cell']);
2852                          $cell = $this->textileLists($cell);
2853                          $cell = $a['space'] . $cell;
2854                      }
2855                  }
2856  
2857                  if ($cellctr > 0) {
2858                      // Ignore first 'cell': it precedes the opening pipe
2859                      $cells[] = $this->doTagBr("t$ctyp", "\t\t\t<t$ctyp$catts>$cell</t$ctyp>");
2860                  }
2861  
2862                  $cellctr++;
2863              }
2864  
2865              $grp = '';
2866  
2867              if ($rgrp && $last_rgrp) {
2868                  $grp .= "\t</t".$last_rgrp.">\n";
2869              }
2870  
2871              if ($rgrp) {
2872                  $grp .= "\t<t".$rgrp.$rgrpatts.">\n";
2873              }
2874  
2875              $last_rgrp = ($rgrp) ? $rgrp : $last_rgrp;
2876              $rows[] = $grp."\t\t<tr$ratts>\n" . join("\n", $cells) . ($cells ? "\n" : "") . "\t\t</tr>";
2877              unset($cells, $catts);
2878          }
2879  
2880          $rows = join("\n", $rows) . "\n";
2881          $close = '';
2882  
2883          if ($last_rgrp) {
2884              $close = "\t</t".$last_rgrp.">\n";
2885          }
2886  
2887          return "<table{$tatts}{$sum}>\n".$cap.$colgrp.$rows.$close."</table>\n\n";
2888      }
2889  
2890      /**
2891       * Parses RedCloth-style definition lists into HTML.
2892       *
2893       * @param  string $text The textile input
2894       * @return string The parsed text
2895       */
2896  
2897      protected function redclothLists($text)
2898      {
2899          return (string)preg_replace_callback(
2900              "/^([-]+$this->cls[ .].*:=.*)$(?![^-])/smU",
2901              array($this, "fRedclothList"),
2902              $text
2903          );
2904      }
2905  
2906      /**
2907       * Constructs a HTML definition list from a RedCloth-style definition structure.
2908       *
2909       * This method is used by Parser::redclothLists() to process
2910       * found definition list structures.
2911       *
2912       * @param  array  $m
2913       * @return string HTML definition list
2914       * @see    Parser::redclothLists()
2915       */
2916  
2917      protected function fRedclothList($m)
2918      {
2919          $in = $m[0];
2920          $out = array();
2921          $text = preg_split('/\n(?=[-])/m', $in);
2922  
2923          foreach ($text as $line) {
2924              $m = array();
2925  
2926              if (preg_match("/^[-]+(?P<atts>$this->cls)\.? (?P<content>.*)$/s", $line, $m)) {
2927                  $content = trim($m['content']);
2928                  $atts = $this->parseAttribs($m['atts']);
2929  
2930                  if (!preg_match(
2931                      "/^(.*?){$this->regex_snippets['space']}*:=(.*?)".
2932                      "{$this->regex_snippets['space']}*(=:|:=)?".
2933                      "{$this->regex_snippets['space']}*$/s",
2934                      $content,
2935                      $xm
2936                  )) {
2937                      $xm = array( $content, $content, '' );
2938                  }
2939  
2940                  list(, $term, $def,) = $xm;
2941                  $term = trim($term);
2942                  $def = trim($def, ' ');
2943  
2944                  if (!$out) {
2945                      if ($def === '') {
2946                          $out[] = "<dl$atts>";
2947                      } else {
2948                          $out[] = '<dl>';
2949                      }
2950                  }
2951  
2952                  if ($term !== '') {
2953                      $pos = strpos($def, "\n");
2954                      $def = trim($def);
2955  
2956                      if ($this->isLineWrapEnabled()) {
2957                          $def = str_replace("\n", "<br />", $def);
2958                      }
2959  
2960                      if ($pos === 0) {
2961                          $def = '<p>' . $def . '</p>';
2962                      }
2963  
2964                      if ($this->isLineWrapEnabled()) {
2965                          $term = str_replace("\n", "<br />", $term);
2966                      }
2967  
2968                      $term = $this->graf($term);
2969                      $def = $this->graf($def);
2970  
2971                      $out[] = "\t<dt$atts>$term</dt>";
2972  
2973                      if ($def !== '') {
2974                          $out[] = "\t<dd>$def</dd>";
2975                      }
2976                  }
2977              }
2978          }
2979  
2980          $out[] = '</dl>';
2981          return implode("\n", $out);
2982      }
2983  
2984      /**
2985       * Parses Textile list structures into HTML.
2986       *
2987       * Searches for ordered, un-ordered and definition lists in the
2988       * textile input and generates HTML lists for them.
2989       *
2990       * @param  string $text The input
2991       * @return string The parsed text
2992       */
2993  
2994      protected function textileLists($text)
2995      {
2996          return (string)preg_replace_callback(
2997              "/^((?:[*;:]+|[*;:#]*#(?:_|\d+)?)$this->cls[ .].*)$(?![^#*;:])/smU",
2998              array($this, "fTextileList"),
2999              $text
3000          );
3001      }
3002  
3003      /**
3004       * Constructs a HTML list from a Textile list structure.
3005       *
3006       * This method is used by Parser::textileLists() to process
3007       * found list structures.
3008       *
3009       * @param  array  $m
3010       * @return string HTML list
3011       * @see    Parser::textileLists()
3012       */
3013  
3014      protected function fTextileList($m)
3015      {
3016          $text = $m[0];
3017          $lines = preg_split('/\n(?=[*#;:])/m', $m[0]);
3018          $list = array();
3019          $prev = false;
3020          $out = array();
3021          $lists = array();
3022          $litem = '';
3023  
3024          if ($lines === false) {
3025              return '';
3026          }
3027  
3028          foreach ($lines as $line) {
3029              $match = preg_match(
3030                  "/^(?P<tl>[#*;:]+)(?P<st>_|\d+)?(?P<atts>$this->cls)[ .](?P<content>.*)$/s",
3031                  $line,
3032                  $m
3033              );
3034  
3035              if ($match) {
3036                  $list[] = array_merge($m, array(
3037                      'level' => strlen($m['tl']),
3038                  ));
3039              } else {
3040                  $list[count($list) - 1]['content'] .= "\n" . $line;
3041              }
3042          }
3043  
3044          if (!$list || $list[0]['level'] > 1) {
3045              return $text;
3046          }
3047  
3048          foreach ($list as $index => $m) {
3049              $start = '';
3050              $content = trim($m['content']);
3051              $ltype = $this->liType($m['tl']);
3052  
3053              if (isset($list[$index + 1])) {
3054                  $next = $list[$index + 1];
3055              } else {
3056                  $next = false;
3057              }
3058  
3059              if (strpos($m['tl'], ';') !== false) {
3060                  $litem = 'dt';
3061              } elseif (strpos($m['tl'], ':') !== false) {
3062                  $litem = 'dd';
3063              } else {
3064                  $litem = 'li';
3065              }
3066  
3067              $showitem = ($content !== '');
3068  
3069              if ('o' === $ltype) {
3070                  if (!isset($this->olstarts[$m['tl']])) {
3071                      $this->olstarts[$m['tl']] = 1;
3072                  }
3073  
3074                  if (!$prev || $m['level'] > $prev['level']) {
3075                      if ($m['st'] === '') {
3076                          $this->olstarts[$m['tl']] = 1;
3077                      } elseif ($m['st'] !== '_') {
3078                          $this->olstarts[$m['tl']] = (int) $m['st'];
3079                      }
3080                  }
3081  
3082                  if ((!$prev || $m['level'] > $prev['level']) && $m['st'] !== '') {
3083                      $start = ' start="' . $this->olstarts[$m['tl']] . '"';
3084                  }
3085  
3086                  if ($showitem) {
3087                      $this->olstarts[$m['tl']] += 1;
3088                  }
3089              }
3090  
3091              if ($prev && $prev['tl'] && strpos($prev['tl'], ';') !== false && strpos($m['tl'], ':') !== false) {
3092                  $lists[$m['tl']] = 2;
3093              }
3094  
3095              $tabs = str_repeat("\t", $m['level'] - 1);
3096              $atts = $this->parseAttribs($m['atts']);
3097  
3098              if (!isset($lists[$m['tl']])) {
3099                  $lists[$m['tl']] = 1;
3100                  $line = $tabs.'<'.$ltype.'l'.$atts.$start.'>';
3101  
3102                  if ($showitem) {
3103                      $line .= "\n$tabs\t<$litem>$content";
3104                  }
3105              } elseif ($showitem) {
3106                  $line = "$tabs\t<$litem$atts>$content";
3107              } else {
3108                  $line = '';
3109              }
3110  
3111              if ((!$next || $next['level'] <= $m['level']) && $showitem) {
3112                  $line .= "</$litem>";
3113              }
3114  
3115              foreach (array_reverse($lists) as $k => $v) {
3116                  $indent = strlen($k);
3117  
3118                  if (!$next || $indent > $next['level']) {
3119                      if ($v !== 2) {
3120                          $line .= "\n$tabs</" . $this->liType($k) . "l>";
3121                      }
3122  
3123                      if ($v !== 2 && $indent > 1) {
3124                          $line .= "</".$litem.">";
3125                      }
3126  
3127                      unset($lists[$k]);
3128                  }
3129              }
3130  
3131              $prev = $m;
3132              $out[] = $line;
3133          }
3134  
3135          $out = implode("\n", $out);
3136          return $this->doTagBr($litem, $out);
3137      }
3138  
3139      /**
3140       * Determines the list type from the Textile input symbol.
3141       *
3142       * @param  string $in Textile input containing the possible list marker
3143       * @return string Either 'd', 'o', 'u'
3144       */
3145  
3146      protected function liType($in)
3147      {
3148          $m = array();
3149          $type = 'd';
3150          if (preg_match('/^(?P<type>[#*]+)/', $in, $m)) {
3151              $type = ('#' === substr($m['type'], -1)) ? 'o' : 'u';
3152          }
3153          return $type;
3154      }
3155  
3156      /**
3157       * Adds br tags within the specified container tag.
3158       *
3159       * @param  string $tag The tag
3160       * @param  string $in  The input
3161       * @return string
3162       */
3163  
3164      protected function doTagBr($tag, $in)
3165      {
3166          return (string)preg_replace_callback(
3167              '@<(?P<tag>'.preg_quote($tag).')(?P<atts>[^>]*?)>(?P<content>.*)(?P<closetag></\1>)@s',
3168              array($this, 'fBr'),
3169              $in
3170          );
3171      }
3172  
3173      /**
3174       * Adds br tags to paragraphs and headings.
3175       *
3176       * @param  string $in The input
3177       * @return string
3178       */
3179  
3180      protected function doPBr($in)
3181      {
3182          return (string)preg_replace_callback(
3183              '@<(?P<tag>p|h[1-6])(?P<atts>[^>]*?)>(?P<content>.*)(?P<closetag></\1>)@s',
3184              array($this, 'fPBr'),
3185              $in
3186          );
3187      }
3188  
3189      /**
3190       * Less restrictive version of fBr method.
3191       *
3192       * Used only in paragraphs and headings where the next row may
3193       * start with a smiley or perhaps something like '#8 bolt...'
3194       * or '*** stars...'.
3195       *
3196       * @param  array $m The input
3197       * @return string
3198       */
3199  
3200      protected function fPBr($m)
3201      {
3202          if ($this->isLineWrapEnabled()) {
3203              // Replaces <br/>\n instances that are not followed by white-space,
3204              // or at end, with single LF.
3205              $m['content'] = preg_replace(
3206                  "~<br[ ]*/?>{$this->regex_snippets['space']}*\n(?![{$this->regex_snippets['space']}|])~i",
3207                  "\n",
3208                  $m['content']
3209              );
3210          }
3211  
3212          // Replaces those LFs that aren't followed by white-space, or at end, with <br /> or a space.
3213          $m['content'] = preg_replace(
3214              "/\n(?![\s|])/",
3215              $this->isLineWrapEnabled() ? '<br />' : ' ',
3216              $m['content']
3217          );
3218  
3219          return '<'.$m['tag'].$m['atts'].'>'.$m['content'].$m['closetag'];
3220      }
3221  
3222      /**
3223       * Formats line breaks.
3224       *
3225       * @param  array  $m The input
3226       * @return string
3227       */
3228  
3229      protected function fBr($m)
3230      {
3231          $content = preg_replace(
3232              "@(.+)(?<!<br>|<br />|</li>|</dd>|</dt>)\n(?![\s|])@",
3233              $this->isLineWrapEnabled() ? '$1<br />' : '$1 ',
3234              $m['content']
3235          );
3236  
3237          return '<'.$m['tag'].$m['atts'].'>'.$content.$m['closetag'];
3238      }
3239  
3240      /**
3241       * Splits the given input into blocks.
3242       *
3243       * Blocks are separated by double line-break boundaries, and processed
3244       * the blocks one by one.
3245       *
3246       * @param  string $text Textile source text
3247       * @return string Input text with blocks processed
3248       */
3249  
3250      protected function blocks($text)
3251      {
3252          $regex = '/^(?P<tag>'.join('|', $this->blocktag_whitelist).')'.
3253              '(?P<atts>'.$this->a.$this->cls.$this->a.')\.(?P<ext>\.?)(?::(?P<cite>\S+))? (?P<graf>.*)$/Ss'.
3254              $this->regex_snippets['mod'];
3255  
3256          $textblocks = preg_split('/(\n{2,})/', $text, null, PREG_SPLIT_DELIM_CAPTURE);
3257  
3258          if ($textblocks === false) {
3259              return '';
3260          }
3261  
3262          $eatWhitespace = false;
3263          $whitespace = '';
3264          $ext = '';
3265          $out = array();
3266  
3267          foreach ($textblocks as $block) {
3268              // Line is just whitespace, keep it for the next block.
3269              if (trim($block) === '') {
3270                  if ($eatWhitespace === false) {
3271                      $whitespace .= $block;
3272                  }
3273                  continue;
3274              }
3275  
3276              if (!$ext) {
3277                  $tag = 'p';
3278                  $atts = '';
3279                  $cite = '';
3280                  $eat = false;
3281              }
3282  
3283              $eatWhitespace = false;
3284              $anonymous_block = !preg_match($regex, $block, $m);
3285  
3286              if (!$anonymous_block) {
3287                  // Last block was extended, so close it
3288                  if ($ext) {
3289                      $out[count($out)-1] .= $c1;
3290                  }
3291  
3292                  // Extract the new block's parts
3293                  extract($m);
3294                  list($o1, $o2, $content, $c2, $c1, $eat) = $this->fBlock($m);
3295  
3296                  // Leave off c1 if this block is extended, we'll close it at the start of the next block
3297                  $block = $o1.$o2.$content.$c2;
3298                  if (!$ext) {
3299                      $block .= $c1;
3300                  }
3301              } else {
3302                  $rawBlock = preg_match($this->patterns['divider'], $block) ||
3303                      ($this->isRawBlocksEnabled() && $this->isRawBlock($block));
3304  
3305                  if ($ext || (strpos($block, ' ') !== 0 && !$rawBlock)) {
3306                      list($o1, $o2, $content, $c2, $c1, $eat) = $this->fBlock(array(
3307                          0,
3308                          $tag,
3309                          $atts,
3310                          $ext,
3311                          $cite,
3312                          $block,
3313                      ));
3314  
3315                      // Skip $o1/$c1 because this is part of a continuing extended block
3316                      if ($tag == 'p' && !$this->hasRawText($content)) {
3317                          $block = $content;
3318                      } else {
3319                          $block = $o2.$content.$c2;
3320                      }
3321                  } elseif ($rawBlock && $this->isRestrictedModeEnabled()) {
3322                      $block = $this->shelve($this->rEncodeHTML($block));
3323                  } elseif ($rawBlock) {
3324                      $block = $this->shelve($block);
3325                  } else {
3326                      $block = $this->graf($block);
3327                  }
3328              }
3329  
3330              $block = $this->doPBr($block);
3331              $block = $whitespace. str_replace('<br>', '<br />', $block);
3332  
3333              if ($ext && $anonymous_block) {
3334                  $out[count($out)-1] .= $block;
3335              } elseif (!$eat) {
3336                  $out[] = $block;
3337              }
3338  
3339              if ($eat) {
3340                  $eatWhitespace = true;
3341              } else {
3342                  $whitespace = '';
3343              }
3344          }
3345  
3346          if ($ext) {
3347              $out[count($out)-1] .= $c1;
3348          }
3349  
3350          return join('', $out);
3351      }
3352  
3353      /**
3354       * Formats the given block.
3355       *
3356       * Adds block tags and formats the text content inside
3357       * the block.
3358       *
3359       * @param  array $m The block content to format
3360       * @return array
3361       */
3362  
3363      protected function fBlock($m)
3364      {
3365          list(, $tag, $att, $ext, $cite, $content) = $m;
3366          $atts = $this->parseAttribs($att);
3367          $space = $this->regex_snippets['space'];
3368  
3369          $o1 = '';
3370          $o2 = '';
3371          $c2 = '';
3372          $c1 = '';
3373          $eat = false;
3374  
3375          if ($tag === 'p') {
3376              // Is this an anonymous block with a note definition?
3377              $notedef = preg_replace_callback(
3378                  "/
3379                      ^note\#                              # start of note def marker
3380                      (?P<label>[^%<*!@#^([{ {$space}.]+)  # label
3381                      (?P<link>[*!^]?)                     # link
3382                      (?P<att>{$this->cls})                # att
3383                      \.?                                  # optional period.
3384                      {$space}+                            # whitespace ends def marker
3385                      (?P<content>.*)$                     # content
3386                  /x".$this->regex_snippets['mod'],
3387                  array($this, "fParseNoteDefs"),
3388                  $content
3389              );
3390  
3391              if ($notedef === '' || $notedef === null) {
3392                  // It will be empty if the regex matched and ate it.
3393                  return array($o1, $o2, $notedef, $c2, $c1, true);
3394              }
3395          }
3396  
3397          if (preg_match("/fn(?P<fnid>{$this->regex_snippets['digit']}+)/".$this->regex_snippets['mod'], $tag, $fns)) {
3398              $tag = 'p';
3399              $fnid = empty($this->fn[$fns['fnid']]) ? $this->linkPrefix . ($this->linkIndex++) : $this->fn[$fns['fnid']];
3400  
3401              // If there is an author-specified ID goes on the wrapper & the auto-id gets pushed to the <sup>
3402              $supp_id = '';
3403              if (strpos($atts, 'class=') === false) {
3404                  $atts .= ' class="footnote"';
3405              }
3406  
3407              if (strpos($atts, ' id=') === false) {
3408                  $atts .= ' id="fn' . $fnid . '"';
3409              } else {
3410                  $supp_id = ' id="fn' . $fnid . '"';
3411              }
3412  
3413              if (strpos($att, '^') === false) {
3414                  $sup = $this->formatFootnote($fns['fnid'], $supp_id);
3415              } else {
3416                  $sup = $this->formatFootnote('<a href="#fnrev' . $fnid . '">'.$fns['fnid'] .'</a>', $supp_id);
3417              }
3418  
3419              $content = $sup . ' ' . $content;
3420          }
3421  
3422          if ($tag == "bq") {
3423              $cite = $this->shelveURL($cite);
3424              $cite = ($cite != '') ? ' cite="' . $cite . '"' : '';
3425              $o1 = "<blockquote$cite$atts>\n";
3426              $o2 = "\t<p".$this->parseAttribs($att, '', false).">";
3427              $c2 = "</p>";
3428              $c1 = "\n</blockquote>";
3429          } elseif ($tag == 'bc') {
3430              $attrib_array = $this->parseAttribsToArray($att, 'code');
3431              $code_class   = '';
3432              if (isset($attrib_array['lang'])) {
3433                  $code_class = ' class="'.$attrib_array['lang'].'"';
3434                  unset($attrib_array['lang']);
3435                  $atts = $this->formatAttributeString($attrib_array);
3436              }
3437              $o1 = "<pre$atts><code$code_class>";
3438              $c1 = "</code></pre>";
3439              $content = $this->shelve($this->rEncodeHTML($content));
3440          } elseif ($tag == 'notextile') {
3441              $content = $this->shelve($content);
3442              $o1 = '';
3443              $o2 = '';
3444              $c1 = '';
3445              $c2 = '';
3446          } elseif ($tag == 'pre') {
3447              $content = $this->shelve($this->rEncodeHTML($content));
3448              $o1 = "<pre$atts>";
3449              $o2 = '';
3450              $c2 = '';
3451              $c1 = "</pre>";
3452          } elseif ($tag == '###') {
3453              $eat = true;
3454          } else {
3455              $o2 = "<$tag$atts>";
3456              $c2 = "</$tag>";
3457          }
3458  
3459          $content = (!$eat) ? $this->graf($content) : '';
3460  
3461          return array($o1, $o2, $content, $c2, $c1, $eat);
3462      }
3463  
3464      /**
3465       * Whether the block is a raw document node.
3466       *
3467       * Raw blocks will be shelved and left as is.
3468       *
3469       * @param  string $text Block to check
3470       * @return bool   TRUE if the block is raw, FALSE otherwise
3471       * @since  3.7.0
3472       */
3473  
3474      protected function isRawBlock($text)
3475      {
3476          if (preg_match($this->patterns['contained'], $text, $m)) {
3477              if (preg_match($this->patterns['phrasing'], $m['open'])) {
3478                  return false;
3479              }
3480  
3481              if (preg_match($this->patterns['block'], $m['open'])) {
3482                  return false;
3483              }
3484  
3485              return true;
3486          }
3487  
3488          return false;
3489      }
3490  
3491      /**
3492       * Formats a footnote.
3493       *
3494       * @param  string $marker The marker
3495       * @param  string $atts   Attributes
3496       * @param  bool   $anchor TRUE, if its a reference link
3497       * @return string Processed footnote
3498       */
3499  
3500      protected function formatFootnote($marker, $atts = '', $anchor = true)
3501      {
3502          $pattern = ($anchor) ? $this->symbols['fn_foot_pattern'] : $this->symbols['fn_ref_pattern'];
3503          return $this->replaceMarkers($pattern, array('atts' => $atts, 'marker' => $marker));
3504      }
3505  
3506      /**
3507       * Replaces markers with replacements in the given input.
3508       *
3509       * @param  string $text         The input
3510       * @param  array  $replacements Marker replacement pairs
3511       * @return string
3512       */
3513  
3514      protected function replaceMarkers($text, $replacements)
3515      {
3516          $map = array();
3517  
3518          foreach ($replacements as $from => $to) {
3519              $map['{'.$from.'}'] = $to;
3520          }
3521  
3522          return strtr($text, $map);
3523      }
3524  
3525      /**
3526       * Parses HTML comments in the given input.
3527       *
3528       * This method finds HTML comments in the given input
3529       * and replaces them with reference tokens.
3530       *
3531       * @param  string $text Textile input
3532       * @return string $text Processed input
3533       */
3534  
3535      protected function getHTMLComments($text)
3536      {
3537          return (string)preg_replace_callback(
3538              "/\<!--(?P<content>.*?)-->/sx",
3539              array($this, "fParseHTMLComments"),
3540              $text
3541          );
3542      }
3543  
3544      /**
3545       * Formats a HTML comment.
3546       *
3547       * Stores the comment on the shelf and returns
3548       * a reference token wrapped in to a HTML comment.
3549       *
3550       * @param  array  $m Options
3551       * @return string Reference token wrapped to a HTML comment tags
3552       */
3553  
3554      protected function fParseHTMLComments($m)
3555      {
3556          return '<!--'.$this->shelve($m['content']).'-->';
3557      }
3558  
3559      /**
3560       * Parses paragraphs in the given input.
3561       *
3562       * @param  string $text Textile input
3563       * @return string Processed input
3564       */
3565  
3566      protected function graf($text)
3567      {
3568          // Handle normal paragraph text
3569          if (!$this->isLiteModeEnabled()) {
3570              // Notextile blocks and inlines
3571              $text = $this->noTextile($text);
3572              // Handle code
3573              $text = $this->code($text);
3574          }
3575  
3576          // HTML comments --
3577          $text = $this->getHTMLComments($text);
3578          // Consume link aliases
3579          $text = $this->getRefs($text);
3580          // Treat quoted quote as a special glyph.
3581          $text = $this->glyphQuotedQuote($text);
3582          // Generate links
3583          $text = $this->links($text);
3584  
3585          // Handle images (if permitted)
3586          if ($this->isImageTagEnabled()) {
3587              $text = $this->images($text);
3588          }
3589  
3590          if (!$this->isLiteModeEnabled()) {
3591              // Handle tables
3592              $text = $this->tables($text);
3593              // Handle redcloth-style definition lists
3594              $text = $this->redclothLists($text);
3595              // Handle ordered & unordered lists plus txp-style definition lists
3596              $text = $this->textileLists($text);
3597          }
3598  
3599          // Inline markup (em, strong, sup, sub, del etc)
3600          $text = $this->spans($text);
3601  
3602          if (!$this->isLiteModeEnabled()) {
3603              // Turn footnote references into supers or links.
3604              // As footnote blocks are banned in lite mode there is no point
3605              // generating links for them.
3606              $text = $this->footnoteRefs($text);
3607  
3608              // Turn note references into links
3609              $text = $this->noteRefs($text);
3610          }
3611  
3612          // Glyph level substitutions (mainly typographic -- " & ' => curly quotes, -- => em-dash etc.
3613          $text = $this->glyphs($text);
3614  
3615          return rtrim($text, "\n");
3616      }
3617  
3618      /**
3619       * Replaces Textile span tags with their equivalent HTML inline tags.
3620       *
3621       * @param  string $text The Textile document to perform the replacements in
3622       * @return string The Textile document with spans replaced by their HTML inline equivalents
3623       */
3624  
3625      protected function spans($text)
3626      {
3627          $span_tags = array_keys($this->span_tags);
3628          $pnct = ".,\"'?!;:‹›«»„“”‚‘’";
3629          $this->span_depth++;
3630  
3631          if ($this->span_depth <= $this->max_span_depth) {
3632              foreach ($span_tags as $tag) {
3633                  $tag = preg_quote($tag);
3634                  $text = (string)preg_replace_callback(
3635                      "/
3636                      (?P<before>^|(?<=[\s>$pnct\(])|[{[])
3637                      (?P<tag>$tag)(?!$tag)
3638                      (?P<atts>{$this->cls})
3639                      (?!$tag)
3640                      (?::(?P<cite>\S+[^$tag]{$this->regex_snippets['space']}))?
3641                      (?P<content>[^{$this->regex_snippets['space']}$tag]+|\S.*?[^\s$tag\n])
3642                      (?P<end>[$pnct]*)
3643                      $tag
3644                      (?P<after>$|[\[\]}<]|(?=[$pnct]{1,2}[^0-9]|\s|\)))
3645                      /x".$this->regex_snippets['mod'],
3646                      array($this, "fSpan"),
3647                      $text
3648                  );
3649              }
3650          }
3651          $this->span_depth--;
3652          return $text;
3653      }
3654  
3655      /**
3656       * Formats a span tag and stores it on the shelf.
3657       *
3658       * @param  array  $m Options
3659       * @return string Content wrapped to reference tokens
3660       * @see    Parser::spans()
3661       */
3662  
3663      protected function fSpan($m)
3664      {
3665          $m = $this->getSpecialOptions($m);
3666          $tag = $this->span_tags[$m['tag']];
3667          $atts = $this->parseAttribsToArray($m['atts']);
3668  
3669          if ($m['cite'] != '') {
3670              $atts['cite'] = trim($m['cite']);
3671              ksort($atts);
3672          }
3673  
3674          $atts = $this->formatAttributeString($atts);
3675          $content = $this->spans($m['content']);
3676          $opentag = '<'.$tag.$atts.'>';
3677          $closetag = '</'.$tag.'>';
3678          $tags = $this->storeTags($opentag, $closetag);
3679          $out = "{$tags['open']}{$content}{$m['end']}{$tags['close']}";
3680  
3681          return $m['before'].$out.$m['after'];
3682      }
3683  
3684      /**
3685       * Stores a tag pair in the tag cache.
3686       *
3687       * @param  string $opentag  Opening tag
3688       * @param  string $closetag Closing tag
3689       * @return array  Reference tokens for both opening and closing tag
3690       */
3691  
3692      protected function storeTags($opentag, $closetag = '')
3693      {
3694          $tags = array();
3695  
3696          $this->refCache[$this->refIndex] = $opentag;
3697          $tags['open'] = $this->uid.$this->refIndex.':ospan ';
3698          $this->refIndex++;
3699  
3700          $this->refCache[$this->refIndex] = $closetag;
3701          $tags['close'] = ' '.$this->uid.$this->refIndex.':cspan';
3702          $this->refIndex++;
3703  
3704          return $tags;
3705      }
3706  
3707      /**
3708       * Replaces reference tokens with corresponding shelved span tags.
3709       *
3710       * This method puts all shelved span tags back to the final,
3711       * parsed input.
3712       *
3713       * @param  string $text The input
3714       * @return string Processed text
3715       * @see    Parser::storeTags()
3716       */
3717  
3718      protected function retrieveTags($text)
3719      {
3720          $text = (string)preg_replace_callback(
3721              '/'.$this->uid.'(?P<token>[0-9]+):ospan /',
3722              array($this, 'fRetrieveTags'),
3723              $text
3724          );
3725  
3726          $text = (string)preg_replace_callback(
3727              '/ '.$this->uid.'(?P<token>[0-9]+):cspan/',
3728              array($this, 'fRetrieveTags'),
3729              $text
3730          );
3731  
3732          return $text;
3733      }
3734  
3735      /**
3736       * Retrieves a tag from the tag cache.
3737       *
3738       * @param  array $m Options
3739       * @return string
3740       * @see    Parser::retrieveTags()
3741       */
3742  
3743      protected function fRetrieveTags($m)
3744      {
3745          return $this->refCache[$m['token']];
3746      }
3747  
3748      /**
3749       * Parses note lists in the given input.
3750       *
3751       * This method should be ran after other blocks
3752       * have been processed, but before reference tokens
3753       * have been replaced with their replacements.
3754       *
3755       * @param  string $text Textile input
3756       * @return string Processed input
3757       */
3758  
3759      protected function placeNoteLists($text)
3760      {
3761          // Sequence all referenced definitions.
3762          if ($this->notes) {
3763              $o = array();
3764              foreach ($this->notes as $label => $info) {
3765                  if (!empty($info['seq'])) {
3766                      $o[$info['seq']] = $info;
3767                      $info['seq'] = $label;
3768                  } else {
3769                      $this->unreferencedNotes[] = $info;    // Unreferenced definitions go here for possible future use.
3770                  }
3771              }
3772  
3773              if ($o) {
3774                  ksort($o);
3775              }
3776  
3777              $this->notes = $o;
3778          }
3779  
3780          // Replace list markers.
3781          $text = (string)preg_replace_callback(
3782              '@<p>notelist(?P<atts>'.$this->c.')'.
3783              '(?:\:(?P<startchar>['.$this->regex_snippets['wrd'].'|'.$this->syms.']))?'.
3784              '(?P<links>[\^!]?)(?P<extras>\+?)\.?'.$this->regex_snippets['space'].'*</p>@U'.
3785              $this->regex_snippets['mod'],
3786              array($this, "fNoteLists"),
3787              $text
3788          );
3789  
3790          return $text;
3791      }
3792  
3793      /**
3794       * Formats a note list.
3795       *
3796       * @param  array  $m Options
3797       * @return string Processed note list
3798       */
3799  
3800      protected function fNoteLists($m)
3801      {
3802          if (!$m['startchar']) {
3803              $m['startchar'] = 'a';
3804          }
3805  
3806          $index = $m['links'].$m['extras'].$m['startchar'];
3807  
3808          if (empty($this->notelist_cache[$index])) {
3809              // If not in cache, build the entry...
3810              $out = array();
3811  
3812              if ($this->notes) {
3813                  foreach ($this->notes as $seq => $info) {
3814                      $links = $this->makeBackrefLink($info, $m['links'], $m['startchar']);
3815  
3816                      if (!empty($info['def'])) {
3817                          $out[] = "\t".'<li'.$info['def']['atts'].'>'.$links.
3818                              '<span id="note'.$info['id'].'"> </span>'.$info['def']['content'].'</li>';
3819                      } else {
3820                          $out[] = "\t".'<li>'.$links.' Undefined Note [#'.$info['seq'].'].</li>';
3821                      }
3822                  }
3823              }
3824  
3825              if ('+' == $m['extras'] && $this->unreferencedNotes) {
3826                  foreach ($this->unreferencedNotes as $info) {
3827                      if (!empty($info['def'])) {
3828                          $out[] = "\t".'<li'.$info['def']['atts'].'>'.$info['def']['content'].'</li>';
3829                      }
3830                  }
3831              }
3832  
3833              $this->notelist_cache[$index] = join("\n", $out);
3834          }
3835  
3836          if ($this->notelist_cache[$index]) {
3837              $atts = $this->parseAttribs($m['atts']);
3838              return "<ol$atts>\n{$this->notelist_cache[$index]}\n</ol>";
3839          }
3840  
3841          return '';
3842      }
3843  
3844      /**
3845       * Renders a note back reference link.
3846       *
3847       * This method renders an array of back reference
3848       * links for notes.
3849       *
3850       * @param  array  $info    Options
3851       * @param  string $g_links Reference type
3852       * @param  string $i       Instance count
3853       * @return string Processed input
3854       */
3855  
3856      protected function makeBackrefLink($info, $g_links, $i)
3857      {
3858          $backlink_type = !empty($info['def']) && $info['def']['link'] ? $info['def']['link'] : $g_links;
3859          $allow_inc = (false === strpos($this->syms, $i));
3860  
3861          $i_ = str_replace(array('&', ';', '#'), '', $this->encodeHigh($i));
3862          $decode = (strlen($i) !== strlen($i_));
3863  
3864          if ($backlink_type === '!') {
3865              return '';
3866          } elseif ($backlink_type === '^') {
3867              return '<sup><a href="#noteref'.$info['refids'][0].'">'.$i.'</a></sup>';
3868          } else {
3869              $out = array();
3870  
3871              foreach ($info['refids'] as $id) {
3872                  $out[] = '<sup><a href="#noteref'.$id.'">'. (($decode) ? $this->decodeHigh($i_) : $i_) .'</a></sup>';
3873                  if ($allow_inc) {
3874                      $i_++;
3875                  }
3876              }
3877  
3878              return join(' ', $out);
3879          }
3880      }
3881  
3882      /**
3883       * Formats note definitions.
3884       *
3885       * This method formats notes and stores them in
3886       * note cache for later use and to build reference
3887       * links.
3888       *
3889       * @param  array  $m Options
3890       * @return string Empty string
3891       */
3892  
3893      protected function fParseNoteDefs($m)
3894      {
3895          $label = $m['label'];
3896          $link = $m['link'];
3897          $att = $m['att'];
3898          $content = $m['content'];
3899  
3900          // Assign an id if the note reference parse hasn't found the label yet.
3901          if (empty($this->notes[$label]['id'])) {
3902              $this->notes[$label]['id'] = $this->linkPrefix . ($this->linkIndex++);
3903          }
3904  
3905          // Ignores subsequent defs using the same label
3906          if (empty($this->notes[$label]['def'])) {
3907              $this->notes[$label]['def'] = array(
3908                  'atts'    => $this->parseAttribs($att),
3909                  'content' => $this->graf($content),
3910                  'link'    => $link,
3911              );
3912          }
3913          return '';
3914      }
3915  
3916      /**
3917       * Parses note references in the given input.
3918       *
3919       * This method replaces note reference tags with
3920       * links.
3921       *
3922       * @param  string $text Textile input
3923       * @return string
3924       */
3925  
3926      protected function noteRefs($text)
3927      {
3928          return (string)preg_replace_callback(
3929              "/\[(?P<atts>{$this->c})\#(?P<label>[^\]!]+?)(?P<nolink>[!]?)\]/Ux",
3930              array($this, "fParseNoteRefs"),
3931              $text
3932          );
3933      }
3934  
3935      /**
3936       * Formats note reference links.
3937       *
3938       * By the time this function is called, all note lists will have been
3939       * processed into the notes array, and we can resolve the link numbers in
3940       * the order we process the references.
3941       *
3942       * @param  array  $m Options
3943       * @return string Note reference
3944       */
3945  
3946      protected function fParseNoteRefs($m)
3947      {
3948          $atts = $this->parseAttribs($m['atts']);
3949          $nolink = ($m['nolink'] === '!');
3950  
3951          // Assign a sequence number to this reference if there isn't one already.
3952  
3953          if (empty($this->notes[$m['label']]['seq'])) {
3954              $num = $this->notes[$m['label']]['seq'] = ($this->note_index++);
3955          } else {
3956              $num = $this->notes[$m['label']]['seq'];
3957          }
3958  
3959          // Make our anchor point & stash it for possible use in backlinks when the
3960          // note list is generated later.
3961          $refid = $this->linkPrefix . ($this->linkIndex++);
3962          $this->notes[$m['label']]['refids'][] = $refid;
3963  
3964          // If we are referencing a note that hasn't had the definition parsed yet, then assign it an ID.
3965  
3966          if (empty($this->notes[$m['label']]['id'])) {
3967              $id = $this->notes[$m['label']]['id'] = $this->linkPrefix . ($this->linkIndex++);
3968          } else {
3969              $id = $this->notes[$m['label']]['id'];
3970          }
3971  
3972          // Build the link (if any).
3973          $out = '<span id="noteref'.$refid.'">'.$num.'</span>';
3974  
3975          if (!$nolink) {
3976              $out = '<a href="#note'.$id.'">'.$out.'</a>';
3977          }
3978  
3979          // Build the reference.
3980          return $this->replaceMarkers($this->symbols['nl_ref_pattern'], array(
3981              'atts' => $atts,
3982              'marker' => $out,
3983          ));
3984      }
3985  
3986      /**
3987       * Parses URI into component parts.
3988       *
3989       * This method splits a URI-like string apart into component parts, while
3990       * also providing validation.
3991       *
3992       * @param  string $uri The string to pick apart (if possible)
3993       * @param  array  $m   Reference to an array the URI component parts are assigned to
3994       * @return bool   TRUE if the string validates as a URI
3995       * @link   http://tools.ietf.org/html/rfc3986#appendix-B
3996       */
3997  
3998      protected function parseURI($uri, &$m)
3999      {
4000          $r = "@^((?P<scheme>[^:/?#]+):)?".
4001              "(//(?P<authority>[^/?#]*))?".
4002              "(?P<path>[^?#]*)".
4003              "(\?(?P<query>[^#]*))?".
4004              "(#(?P<fragment>.*))?@";
4005  
4006          return preg_match($r, $uri, $m) === 1;
4007      }
4008  
4009      /**
4010       * Checks whether a component part can be added to a URI.
4011       *
4012       * @param  array  $mask  An array of allowed component parts
4013       * @param  string $name  The component to add
4014       * @param  array  $parts An array of existing components to modify
4015       * @return bool   TRUE if the component can be added
4016       */
4017  
4018      protected function addPart($mask, $name, $parts)
4019      {
4020          return (in_array($name, $mask) && isset($parts[$name]) && '' !== $parts[$name]);
4021      }
4022  
4023      /**
4024       * Rebuild a URI from parsed parts and a mask.
4025       *
4026       * @param  array  $parts  Full array of URI parts
4027       * @param  string $mask   Comma separated list of URI parts to include in the rebuilt URI
4028       * @param  bool   $encode Flag to control encoding of the path part of the rebuilt URI
4029       * @return string         The rebuilt URI
4030       * @link   http://tools.ietf.org/html/rfc3986#section-5.3
4031       */
4032  
4033      protected function rebuildURI($parts, $mask = 'scheme,authority,path,query,fragment', $encode = true)
4034      {
4035          $mask = explode(',', $mask);
4036          $out = '';
4037  
4038          if ($this->addPart($mask, 'scheme', $parts)) {
4039              $out .= $parts['scheme'] . ':';
4040          }
4041  
4042          if ($this->addPart($mask, 'authority', $parts)) {
4043              $out .= '//' . $parts['authority'];
4044          }
4045  
4046          if ($this->addPart($mask, 'path', $parts)) {
4047              if (!$encode) {
4048                  $out .= $parts['path'];
4049              } else {
4050                  $pp = explode('/', $parts['path']);
4051                  foreach ($pp as &$p) {
4052                      $p = str_replace(array('%25', '%40'), array('%', '@'), rawurlencode($p));
4053                      if (!in_array($parts['scheme'], array('mailto'))) {
4054                          $p = str_replace('%2B', '+', $p);
4055                      }
4056                  }
4057  
4058                  $pp = implode('/', $pp);
4059                  $out .= $pp;
4060              }
4061          }
4062  
4063          if ($this->addPart($mask, 'query', $parts)) {
4064              $out .= '?' . $parts['query'];
4065          }
4066  
4067          if ($this->addPart($mask, 'fragment', $parts)) {
4068              $out .= '#' . $parts['fragment'];
4069          }
4070  
4071          return $out;
4072      }
4073  
4074      /**
4075       * Parses and shelves links in the given input.
4076       *
4077       * This method parses the input Textile document for links.
4078       * Formats and encodes them, and stores the created link
4079       * elements in cache.
4080       *
4081       * @param  string $text Textile input
4082       * @return string The input document with link pulled out and replaced with tokens
4083       */
4084  
4085      protected function links($text)
4086      {
4087          $text = $this->markStartOfLinks($text);
4088          return $this->replaceLinks($text);
4089      }
4090  
4091      /**
4092       * Finds and marks the start of well formed links in the input text.
4093       *
4094       * @param  string $text String to search for link starting positions
4095       * @return string Text with links marked
4096       * @see    Parser::links()
4097       */
4098  
4099      protected function markStartOfLinks($text)
4100      {
4101          // Slice text on '":<not space>' boundaries. These always occur in inline
4102          // links between the link text and the url part and are much more
4103          // infrequent than '"' characters so we have less possible links
4104          // to process.
4105          $mod = $this->regex_snippets['mod'];
4106          $slices = preg_split('/":(?='.$this->regex_snippets['char'].')/'.$mod, $text);
4107  
4108          if ($slices === false) {
4109              return '';
4110          }
4111  
4112          if (count($slices) > 1) {
4113              // There are never any start of links in the last slice, so pop it
4114              // off (we'll glue it back later).
4115              $last_slice = array_pop($slices);
4116  
4117              foreach ($slices as &$slice) {
4118                  // If there is no possible start quote then this slice is not a link
4119                  if (strpos($slice, '"') === false) {
4120                      continue;
4121                  }
4122  
4123                  // Cut this slice into possible starting points wherever we
4124                  // find a '"' character. Any of these parts could represent
4125                  // the start of the link text - we have to find which one.
4126                  $possible_start_quotes = explode('"', $slice);
4127  
4128                  // Start our search for the start of the link with the closest prior
4129                  // quote mark.
4130                  $possibility = rtrim(array_pop($possible_start_quotes));
4131  
4132                  // Init the balanced count. If this is still zero at the end
4133                  // of our do loop we'll mark the " that caused it to balance
4134                  // as the start of the link and move on to the next slice.
4135                  $balanced = 0;
4136                  $linkparts = array();
4137                  $iter = 0;
4138  
4139                  while ($possibility !== null) {
4140                      // Starting at the end, pop off the previous part of the
4141                      // slice's fragments.
4142  
4143                      // Add this part to those parts that make up the link text.
4144                      $linkparts[] = $possibility;
4145  
4146                      if ($possibility !== '') {
4147                          // did this part inc or dec the balanced count?
4148                          if (preg_match('/^\S|=$/'.$mod, $possibility)) {
4149                              $balanced--;
4150                          }
4151  
4152                          if (preg_match('/\S$/'.$mod, $possibility)) {
4153                              $balanced++;
4154                          }
4155  
4156                          $possibility = array_pop($possible_start_quotes);
4157                      } else {
4158                          // If quotes occur next to each other, we get zero length strings.
4159                          // eg. ...""Open the door, HAL!"":url...
4160                          // In this case we count a zero length in the last position as a
4161                          // closing quote and others as opening quotes.
4162                          $balanced = (!$iter++) ? $balanced+1 : $balanced-1;
4163  
4164                          $possibility = array_pop($possible_start_quotes);
4165  
4166                          // If out of possible starting segments we back the last one
4167                          // from the linkparts array
4168                          if ($possibility === null) {
4169                              array_pop($linkparts);
4170                              break;
4171                          }
4172  
4173                          // If the next possibility is empty or ends in a space we have a
4174                          // closing ".
4175                          if ($possibility === '' ||
4176                              preg_match("~{$this->regex_snippets['space']}$~".$mod, $possibility)) {
4177                              $balanced = 0; // force search exit
4178                          }
4179                      }
4180  
4181                      if ($balanced <= 0) {
4182                          array_push($possible_start_quotes, $possibility);
4183                          break;
4184                      }
4185                  }
4186  
4187                  // Rebuild the link's text by reversing the parts and sticking them back
4188                  // together with quotes.
4189                  $link_content = implode('"', array_reverse($linkparts));
4190  
4191                  // Rebuild the remaining stuff that goes before the link but that's
4192                  // already in order.
4193                  $pre_link = implode('"', $possible_start_quotes);
4194  
4195                  // Re-assemble the link starts with a specific marker for the next regex.
4196                  $slice = $pre_link . $this->uid.'linkStartMarker:"' . $link_content;
4197              }
4198  
4199              // Add the last part back
4200              $slices[] = $last_slice;
4201          }
4202  
4203          // Re-assemble the full text with the start and end markers
4204          $text = implode('":', $slices);
4205  
4206          return $text;
4207      }
4208  
4209      /**
4210       * Replaces links with tokens and stores them on the shelf.
4211       *
4212       * @param  string $text The input
4213       * @return string Processed input
4214       * @see    Parser::links()
4215       */
4216  
4217      protected function replaceLinks($text)
4218      {
4219          $stopchars = "\s|^'\"*";
4220          $needle = $this->uid . 'linkStartMarker:';
4221          $prev = null;
4222  
4223          while (\strpos($text, $needle) !== false) {
4224              $text = (string)preg_replace_callback(
4225                  '/
4226                  (?P<pre>\[)?
4227                  ' . $needle . '"
4228                  (?P<inner>(?:.|\n)*?)
4229                  ":(?P<urlx>[^' . $stopchars . ']*)
4230                  /x' . $this->regex_snippets['mod'],
4231                  array($this, "fLink"),
4232                  $text
4233              );
4234  
4235              if ($prev === $text) {
4236                  break;
4237              }
4238  
4239              $prev = $text;
4240          }
4241  
4242          return $text;
4243      }
4244  
4245      /**
4246       * Formats a link and stores it on the shelf.
4247       *
4248       * @param  array  $m Options
4249       * @return string Reference token for the shelved content
4250       * @see    Parser::replaceLinks()
4251       */
4252  
4253      protected function fLink($m)
4254      {
4255          $in = $m[0];
4256          $pre = $m['pre'];
4257          if ($this->isLineWrapEnabled()) {
4258              $inner = str_replace("\n", '<br />', $m['inner']);
4259          } else {
4260              $inner = str_replace("\n", ' ', $m['inner']);
4261          }
4262          $url = $m['urlx'];
4263          $m = array();
4264  
4265          // Treat empty inner part as an invalid link.
4266          if (trim($inner) === '') {
4267              return $pre.'"'.$inner.'":'.$url;
4268          }
4269  
4270          // Split inner into $atts, $text and $title..
4271          preg_match(
4272              '/
4273              ^
4274              (?P<atts>' . $this->cls . ')            # $atts (if any)
4275              ' . $this->regex_snippets['space'] . '* # any optional spaces
4276              (?P<text>                               # $text is...
4277                  (!.+!)                              #     an image
4278              |                                       #   else...
4279                  .+?                                 #     link text
4280              )                                       # end of $text
4281              (?:\((?P<title>[^)]+?)\))?              # $title (if any)
4282              $
4283              /x'.$this->regex_snippets['mod'],
4284              $inner,
4285              $m
4286          );
4287          $atts = isset($m['atts']) ? $m['atts'] : '';
4288          $text = isset($m['text']) ? trim($m['text']) : $inner;
4289          $title = isset($m['title']) ? $m['title'] : '';
4290          $m = array();
4291  
4292          $pop = $tight = '';
4293          $counts = array(
4294              '[' => null,
4295              ']' => substr_count($url, ']'), # We need to know how many closing square brackets we have
4296              '(' => null,
4297              ')' => null,
4298          );
4299  
4300          // Look for footnotes or other square-bracket delimieted stuff at the end of the url...
4301          // eg. "text":url][otherstuff... will have "[otherstuff" popped back out.
4302          //     "text":url?q[]=x][123]    will have "[123]" popped off the back, the remaining closing square brackets
4303          //                               will later be tested for balance
4304          if ($counts[']']) {
4305              if (1 === preg_match('@(?P<url>^.*\])(?P<tight>\[.*?)$@' . $this->regex_snippets['mod'], $url, $m)) {
4306                  $url = $m['url'];
4307                  $tight = $m['tight'];
4308                  $m = array();
4309              }
4310          }
4311  
4312          // Split off any trailing text that isn't part of an array assignment.
4313          // eg. "text":...?q[]=value1&q[]=value2 ... is ok
4314          // "text":...?q[]=value1]following  ... would have "following"
4315          // popped back out and the remaining square bracket
4316          // will later be tested for balance
4317          if ($counts[']']) {
4318              if (1 === preg_match('@(?P<url>^.*\])(?!=)(?P<end>.*?)$@' . $this->regex_snippets['mod'], $url, $m)) {
4319                  $url = $m['url'];
4320                  $tight = $m['end'] . $tight;
4321                  $m = array();
4322              }
4323          }
4324  
4325          // Does this need to be mb_ enabled? We are only searching for text in the ASCII charset anyway
4326          // Create an array of (possibly) multi-byte characters.
4327          // This is going to allow us to pop off any non-matched or nonsense chars from the url
4328          $url_chars = str_split($url);
4329  
4330          // Now we have the array of all the multi-byte chars in the url we will parse the
4331          // uri backwards and pop off
4332          // any chars that don't belong there (like . or , or unmatched brackets of various kinds).
4333          $first = true;
4334          do {
4335              $c = array_pop($url_chars);
4336              $popped = false;
4337              switch ($c) {
4338                  // Textile URL shouldn't end in these characters, we pop
4339                  // them off the end and push them out the back of the url again.
4340                  case '!':
4341                  case '?':
4342                  case ':':
4343                  case ';':
4344                  case '.':
4345                  case ',':
4346                      $pop = $c . $pop;
4347                      $popped = true;
4348                      break;
4349  
4350                  case '>':
4351                      $urlLeft = implode('', $url_chars);
4352  
4353                      if (preg_match('@(?P<tag><\/[a-z]+)$@', $urlLeft, $m)) {
4354                          $url_chars = str_split(substr($urlLeft, 0, strlen($m['tag']) * -1));
4355                          $pop = $m['tag'] . $c . $pop;
4356                          $popped = true;
4357                      }
4358  
4359                      break;
4360  
4361                  case ']':
4362                      // If we find a closing square bracket we are going to see if it is balanced.
4363                      // If it is balanced with matching opening bracket then it is part of the URL
4364                      // else we spit it back out of the URL.
4365                      if (null === $counts['[']) {
4366                          $counts['['] = substr_count($url, '[');
4367                      }
4368  
4369                      if ($counts['['] === $counts[']']) {
4370                          // It is balanced, so keep it
4371                          $url_chars[] = $c;
4372                      } else {
4373                          // In the case of un-matched closing square brackets we just eat it
4374                          $popped = true;
4375                          $counts[']'] -= 1;
4376                          if ($first) {
4377                              $pre = '';
4378                          }
4379                      }
4380                      break;
4381  
4382                  case ')':
4383                      if (null === $counts[')']) {
4384                          $counts['('] = substr_count($url, '(');
4385                          $counts[')'] = substr_count($url, ')');
4386                      }
4387  
4388                      if ($counts['('] === $counts[')']) {
4389                          // It is balanced, so keep it
4390                          $url_chars[] = $c;
4391                      } else {
4392                          // Unbalanced so spit it out the back end
4393                          $pop = $c . $pop;
4394                          $counts[')'] -= 1;
4395                          $popped = true;
4396                      }
4397                      break;
4398  
4399                  default:
4400                      // We have an acceptable character for the end of the url so put it back and
4401                      // exit the character popping loop
4402                      $url_chars[] = $c;
4403                      break;
4404              }
4405              $first = false;
4406          } while ($popped);
4407  
4408          $url = implode('', $url_chars);
4409          $uri_parts = array();
4410          $this->parseURI($url, $uri_parts);
4411  
4412          if (!$this->isValidUrl($url)) {
4413              return str_replace($this->uid.'linkStartMarker:', '', $in);
4414          }
4415  
4416          $scheme = $uri_parts['scheme'];
4417          $scheme_in_list = in_array($scheme, $this->url_schemes);
4418  
4419          if ('$' === $text) {
4420              if ($scheme_in_list) {
4421                  $text = ltrim($this->rebuildURI($uri_parts, 'authority,path,query,fragment', false), '/');
4422              } else {
4423                  if (isset($this->urlrefs[$url])) {
4424                      $url = urldecode($this->urlrefs[$url]);
4425                  }
4426  
4427                  $text = $url;
4428              }
4429          }
4430  
4431          $text = trim($text);
4432          $title = $this->encodeHTML($title);
4433  
4434          if ($this->isImageTagEnabled()) {
4435              $text = $this->images($text);
4436          }
4437  
4438          $text = $this->spans($text);
4439          $text = $this->glyphs($text);
4440          $url = $this->shelveURL($this->rebuildURI($uri_parts));
4441          $a = $this->newTag(
4442              'a',
4443              $this->parseAttribsToArray($atts),
4444              false
4445          )->title($title)->href($url, true)->rel($this->rel);
4446          $tags = $this->storeTags((string) $a, '</a>');
4447          $out = $this->shelve($tags['open'].trim($text).$tags['close']);
4448  
4449          return $pre . $out . $pop . $tight;
4450      }
4451  
4452      /**
4453       * Finds URI aliases within the given input.
4454       *
4455       * This method finds URI aliases in the Textile input. Links are stored
4456       * in an internal cache, so that they can be referenced from any link
4457       * in the document.
4458       *
4459       * This operation happens before the actual link parsing takes place.
4460       *
4461       * @param  string $text Textile input
4462       * @return string The Textile document with any URI aliases removed
4463       */
4464  
4465      protected function getRefs($text)
4466      {
4467          $pattern = array();
4468  
4469          foreach ($this->url_schemes as $scheme) {
4470              $pattern[] = preg_quote($scheme.':', '/');
4471          }
4472  
4473          $pattern =
4474              '/^\[(?P<alias>.+)\]'.
4475              '(?P<url>(?:'.join('|', $pattern).'|\/)\S+)'.
4476              '(?='.$this->regex_snippets['space'].'|$)/Um';
4477  
4478          return (string)preg_replace_callback(
4479              $pattern.$this->regex_snippets['mod'],
4480              array($this, "refs"),
4481              $text
4482          );
4483      }
4484  
4485      /**
4486       * Parses, encodes and shelves the current URI alias.
4487       *
4488       * @param  array $m Options
4489       * @return string Empty string
4490       * @see    Parser::getRefs()
4491       */
4492  
4493      protected function refs($m)
4494      {
4495          $uri_parts = array();
4496          $this->parseURI($m['url'], $uri_parts);
4497          // Encodes URL if needed.
4498          $this->urlrefs[$m['alias']] = ltrim($this->rebuildURI($uri_parts));
4499          return '';
4500      }
4501  
4502      /**
4503       * Shelves parsed URLs.
4504       *
4505       * Stores away a URL fragments that have been parsed
4506       * and requires no more processing.
4507       *
4508       * @param  string $text  The URL
4509       * @param  string $type  The type
4510       * @return string The fragment's unique reference ID
4511       * @see    Parser::retrieveURLs()
4512       */
4513  
4514      protected function shelveURL($text, $type = null)
4515      {
4516          if ('' === $text) {
4517              return '';
4518          }
4519  
4520          if ($type === null) {
4521              $type = 'url';
4522          }
4523  
4524          $this->refCache[$this->refIndex] = $text;
4525          return $this->uid.($this->refIndex++).':'.$type;
4526      }
4527  
4528      /**
4529       * Replaces reference tokens with corresponding shelved URL.
4530       *
4531       * This method puts all shelved URLs back to the final,
4532       * parsed input.
4533       *
4534       * @param  string $text The input
4535       * @return string Processed text
4536       * @see    Parser::shelveURL()
4537       */
4538  
4539      protected function retrieveURLs($text)
4540      {
4541          return (string)preg_replace_callback(
4542              '/'.$this->uid.'(?P<token>[0-9]+):(?P<type>url|image)/',
4543              array($this, 'retrieveURL'),
4544              $text
4545          );
4546      }
4547  
4548      /**
4549       * Retrieves an URL from the shelve.
4550       *
4551       * @param  array  $m Options
4552       * @return string The URL
4553       */
4554  
4555      protected function retrieveURL($m)
4556      {
4557          if (!isset($this->refCache[$m['token']])) {
4558              return '';
4559          }
4560  
4561          $url = $this->refCache[$m['token']];
4562  
4563          if (isset($this->urlrefs[$url])) {
4564              $url = $this->urlrefs[$url];
4565          }
4566  
4567          return $this->rEncodeHTML($this->relURL($url, $m['type']));
4568      }
4569  
4570      /**
4571       * Whether the URL is valid.
4572       *
4573       * Checks are done according the used preferences to
4574       * determinate whether the URL should be accepted and
4575       * essentially whether its safe.
4576       *
4577       * @param  string $url The URL to check
4578       * @return bool   TRUE if valid, FALSE otherwise
4579       * @since  3.6.0
4580       */
4581  
4582      protected function isValidUrl($url)
4583      {
4584          if ($this->parseURI($url, $component)) {
4585              if (!isset($component['scheme']) || $component['scheme'] === '') {
4586                  return true;
4587              }
4588  
4589              if (in_array($component['scheme'], $this->url_schemes, true)) {
4590                  return true;
4591              }
4592          }
4593  
4594          return false;
4595      }
4596  
4597      /**
4598       * Completes and formats a relative URL.
4599       *
4600       * This method adds $this->relativeImagePrefix to the
4601       * URL if it is relative.
4602       *
4603       * The URI is kept as is if it starts with a '/', './', '../',
4604       * or the URL starts with one of $this->url_schemes. Otherwise
4605       * the URL is prefixed.
4606       *
4607       * @param  string $url  The URL
4608       * @param  string $type The type
4609       * @return string Absolute URL
4610       */
4611  
4612      protected function relURL($url, $type = null)
4613      {
4614          if ($this->relativeImagePrefix !== null) {
4615              // Use legacy fallback if set. Deprecated in 3.7.0.
4616              $prefix = $this->relativeImagePrefix;
4617          } elseif ($type === null || $type === 'image') {
4618              $prefix = $this->relImagePrefix;
4619          } else {
4620              $prefix = $this->relLinkPrefix;
4621          }
4622  
4623          if ($prefix) {
4624              if (strpos($url, '/') === 0 || strpos($url, './') === 0 || strpos($url, '../') === 0 ||
4625                  strpos($url, '#') === 0
4626              ) {
4627                  return $url;
4628              }
4629  
4630              foreach ($this->url_schemes as $scheme) {
4631                  if (strpos($url, $scheme . ':') === 0) {
4632                      return $url;
4633                  }
4634              }
4635  
4636              return $prefix.$url;
4637          }
4638  
4639          return $url;
4640      }
4641  
4642      /**
4643       * Checks if an URL is relative.
4644       *
4645       * The given URL is considered relative if it
4646       * start anything other than with '//' or a
4647       * valid scheme.
4648       *
4649       * @param  string $url The URL
4650       * @return bool   TRUE if relative, FALSE otherwise
4651       */
4652  
4653      protected function isRelURL($url)
4654      {
4655          if (strpos($url, '//') === 0) {
4656              return false;
4657          }
4658  
4659          foreach ($this->url_schemes as $scheme) {
4660              if (strpos($url, $scheme . '://') === 0) {
4661                  return false;
4662              }
4663          }
4664  
4665          return true;
4666      }
4667  
4668      /**
4669       * Parses and shelves images in the given input.
4670       *
4671       * This method parses the input Textile document for images and
4672       * generates img HTML tags for each one found, caching the
4673       * generated img tag internally and replacing the Textile image with a
4674       * token to the cached tag.
4675       *
4676       * @param  string $text Textile input
4677       * @return string The input document with images pulled out and replaced with tokens
4678       */
4679  
4680      protected function images($text)
4681      {
4682          return (string)preg_replace_callback(
4683              '/
4684              (?:[[{])?                       # pre
4685              \!                              # opening !
4686              (?P<align>\<|\=|\>|&lt;|&gt;)?  # optional alignment
4687              (?P<atts>'.$this->cls.')        # optional attributes
4688              (?:\.\s)?                       # optional dot-space
4689              (?P<url>[^\s(!]+)               # presume this is the src
4690              \s?                             # optional space
4691              (?:\((?P<title>[^\)]+)\))?      # optional title
4692              \!                              # closing
4693              (?::(?P<href>\S+)(?<![\]).,]))? # optional href sans final punct
4694              (?:[\]}]|(?=[.,\s)|]|$))        # lookahead: space,.)| or end of string ("|" needed if image in table cell)
4695              /x'.$this->regex_snippets['mod'],
4696              array($this, "fImage"),
4697              $text
4698          );
4699      }
4700  
4701      /**
4702       * Checks that the given path is under the document root.
4703       *
4704       * @param  string $path Path to check
4705       * @return bool   TRUE if path is within the image document root
4706       * @see    Parser::images()
4707       * @since  3.6.0
4708       */
4709  
4710      protected function isInDocumentRootDirectory($path)
4711      {
4712          $realpath = realpath($path);
4713  
4714          if ($realpath) {
4715              $root = str_replace('\\', '/', $this->getDocumentRootDirectory());
4716              $realpath = str_replace('\\', '/', $realpath);
4717              return (0 === strpos($realpath, $root));
4718          }
4719  
4720          return false;
4721      }
4722  
4723      /**
4724       * Formats an image and stores it on the shelf.
4725       *
4726       * @param  array  $m Options
4727       * @return string Reference token for the shelved content
4728       * @see    Parser::images()
4729       */
4730  
4731      protected function fImage($m)
4732      {
4733          if (!$this->isValidUrl($m['url'])) {
4734              return $m[0];
4735          }
4736  
4737          $extras = '';
4738          $align = (isset($m['align'])) ? $m['align'] : '';
4739          $atts = $m['atts'];
4740          $url = $m['url'];
4741          $title = (isset($m['title'])) ? $m['title'] : '';
4742          $href = (isset($m['href'])) ? $m['href'] : '';
4743  
4744          $alignments = array(
4745              '<'    => 'left',
4746              '='    => 'center',
4747              '>'    => 'right',
4748              '&lt;' => 'left',
4749              '&gt;' => 'right',
4750          );
4751  
4752          if (isset($alignments[$align])) {
4753              if ($this->getDocumentType() === 'html5') {
4754                  $extras = 'align-'.$alignments[$align];
4755                  $align = '';
4756              } else {
4757                  $align = $alignments[$align];
4758              }
4759          } else {
4760              $align = '';
4761          }
4762  
4763          if ($title) {
4764              $title = $this->encodeHTML($title);
4765          }
4766  
4767          $img = $this->newTag('img', $this->parseAttribsToArray($atts, '', true, $extras))
4768              ->align($align)
4769              ->alt($title, true)
4770              ->src($this->shelveURL($url, 'image'), true)
4771              ->title($title);
4772  
4773          if (!$this->dimensionless_images && $this->isRelUrl($url)) {
4774              $location = $this->getDocumentRootDirectory().ltrim($url, '\\/');
4775              $location_ok = $this->isInDocumentRootDirectory($location);
4776              if ($location_ok) {
4777                  $real_location = realpath($location);
4778                  if ($real_location && ($size = getimagesize($real_location))) {
4779                      $img->height($size[1])->width($size[0]);
4780                  }
4781              }
4782          }
4783  
4784          $out = (string) $img;
4785  
4786          if ($href) {
4787              $href = $this->shelveURL($href);
4788              $link = $this->newTag('a', array(), false)->href($href)->rel($this->rel);
4789              $out = (string) $link . "$img</a>";
4790          }
4791  
4792          return $this->shelve($out);
4793      }
4794  
4795      /**
4796       * Parses code blocks in the given input.
4797       *
4798       * @param  string $text The input
4799       * @return string Processed text
4800       */
4801  
4802      protected function code($text)
4803      {
4804          $text = $this->doSpecial($text, '<code>', '</code>', 'fCode');
4805          $text = $this->doSpecial($text, '@', '@', 'fCode');
4806          $text = $this->doSpecial($text, '<pre>', '</pre>', 'fPre');
4807          return $text;
4808      }
4809  
4810      /**
4811       * Formats inline code tags.
4812       *
4813       * @param  array  $m
4814       * @return string
4815       */
4816  
4817      protected function fCode($m)
4818      {
4819          $m = $this->getSpecialOptions($m);
4820  
4821          return $m['before'].$this->shelve('<code>'.$this->rEncodeHTML($m['content']).'</code>').$m['after'];
4822      }
4823  
4824      /**
4825       * Formats pre tags.
4826       *
4827       * @param  array  $m Options
4828       * @return string
4829       */
4830  
4831      protected function fPre($m)
4832      {
4833          $m = $this->getSpecialOptions($m);
4834  
4835          return $m['before'].'<pre>'.$this->shelve($this->rEncodeHTML($m['content'])).'</pre>'.$m['after'];
4836      }
4837  
4838      /**
4839       * Shelves parsed content.
4840       *
4841       * Stores away a fragment of the source text that have been parsed
4842       * and requires no more processing.
4843       *
4844       * @param  string $val The content
4845       * @return string The fragment's unique reference ID
4846       * @see    Parser::retrieve()
4847       */
4848  
4849      protected function shelve($val)
4850      {
4851          $i = $this->uid.($this->refIndex++).':shelve';
4852          $this->shelf[$i] = $val;
4853          return $i;
4854      }
4855  
4856      /**
4857       * Replaces reference tokens with corresponding shelved content.
4858       *
4859       * This method puts all shelved content back to the final,
4860       * parsed input.
4861       *
4862       * @param  string $text The input
4863       * @return string Processed text
4864       * @see    Parser::shelve()
4865       */
4866  
4867      protected function retrieve($text)
4868      {
4869          if ($this->shelf) {
4870              do {
4871                  $old = $text;
4872                  $text = str_replace(array_keys($this->shelf), $this->shelf, $text);
4873              } while ($text != $old);
4874          }
4875  
4876          return $text;
4877      }
4878  
4879      /**
4880       * Removes BOM and unifies line ending in the given input.
4881       *
4882       * @param  string $text Input Textile
4883       * @return string Cleaned version of the input
4884       */
4885  
4886      protected function cleanWhiteSpace($text)
4887      {
4888          // Removes byte order mark.
4889          $out = (string)preg_replace("/^\xEF\xBB\xBF|\x1A/", '', $text);
4890          // Replaces CRLF and CR with single LF.
4891          $out = (string)preg_replace("/\r\n?/", "\n", $out);
4892          // Removes leading tabs and spaces, if the line is otherwise empty.
4893          $out = (string)preg_replace("/^[ \t]*\n/m", "\n", $out);
4894          // Removes leading and ending blank lines.
4895          $out = trim($out, "\n");
4896          return $out;
4897      }
4898  
4899      /**
4900       * Removes any unique tokens from the input.
4901       *
4902       * @param  string $text The input to clean
4903       * @return string Cleaned input
4904       * @since  3.5.5
4905       */
4906  
4907      protected function cleanUniqueTokens($text)
4908      {
4909          return str_replace($this->uid, '', $text);
4910      }
4911  
4912      /**
4913       * Uses the specified callback method to format the content between end and start nodes.
4914       *
4915       * @param  string $text   The input to format
4916       * @param  string $start  The start node to look for
4917       * @param  string $end    The end node to look for
4918       * @param  string $method The callback method
4919       * @return string Processed input
4920       */
4921  
4922      protected function doSpecial($text, $start, $end, $method)
4923      {
4924          return (string)preg_replace_callback(
4925              '/(?P<before>^|\s|[|[({>])'.
4926              preg_quote($start, '/').'(?P<content>.*?)'.preg_quote($end, '/').
4927              '(?<after>\]?)/ms',
4928              array($this, $method),
4929              $text
4930          );
4931      }
4932  
4933      /**
4934       * Gets an array of processed special options.
4935       *
4936       * @param  array $m Options
4937       * @return array
4938       * @since  3.7.2
4939       */
4940  
4941      protected function getSpecialOptions($m)
4942      {
4943          foreach ($this->spanWrappers as $before => $after) {
4944              if ($m['before'] === $before && $m['after'] === $after) {
4945                  $m['before'] = '';
4946                  $m['after'] = '';
4947                  break;
4948              }
4949          }
4950  
4951          return $m;
4952      }
4953  
4954      /**
4955       * Parses notextile tags in the given input.
4956       *
4957       * @param  string $text The input
4958       * @return string Processed input
4959       */
4960  
4961      protected function noTextile($text)
4962      {
4963          $text = $this->doSpecial($text, '<notextile>', '</notextile>', 'fTextile');
4964          return $this->doSpecial($text, '==', '==', 'fTextile');
4965      }
4966  
4967      /**
4968       * Format notextile blocks.
4969       *
4970       * @param  array $m Options
4971       * @return string
4972       */
4973  
4974      protected function fTextile($m)
4975      {
4976          $m = $this->getSpecialOptions($m);
4977  
4978          return $m['before'].$this->shelve($m['content']).$m['after'];
4979      }
4980  
4981      /**
4982       * Parses footnote reference links in the given input.
4983       *
4984       * This method replaces [n] instances with links.
4985       *
4986       * @param  string $text The input
4987       * @return string $text Processed input
4988       * @see    Parser::footnoteID()
4989       */
4990  
4991      protected function footnoteRefs($text)
4992      {
4993          return (string)preg_replace_callback(
4994              '/(?<=\S)\[(?P<id>'.$this->regex_snippets['digit'].'+)'.
4995              '(?P<nolink>!?)\]'.$this->regex_snippets['space'].'?/U'.$this->regex_snippets['mod'],
4996              array($this, 'footnoteID'),
4997              $text
4998          );
4999      }
5000  
5001      /**
5002       * Renders a footnote reference link or ID.
5003       *
5004       * @param  array  $m Options
5005       * @return string Footnote link, or ID
5006       */
5007  
5008      protected function footnoteID($m)
5009      {
5010          $backref = ' class="footnote"';
5011  
5012          if (empty($this->fn[$m['id']])) {
5013              $this->fn[$m['id']] = $id = $this->linkPrefix . ($this->linkIndex++);
5014              $backref .= " id=\"fnrev$id\"";
5015          }
5016  
5017          $fnid = $this->fn[$m['id']];
5018          $footref = ('!' == $m['nolink']) ? $m['id'] : '<a href="#fn'.$fnid.'">'.$m['id'].'</a>';
5019          $footref = $this->formatFootnote($footref, $backref, false);
5020  
5021          return $footref;
5022      }
5023  
5024      /**
5025       * Parses and shelves quoted quotes in the given input.
5026       *
5027       * @param  string $text The text to search for quoted quotes
5028       * @param  string $find Pattern to search
5029       * @return string
5030       * @since  3.5.4
5031       */
5032  
5033      protected function glyphQuotedQuote($text, $find = '"?|"[^"]+"')
5034      {
5035          return (string)preg_replace_callback(
5036              "/ (?P<pre>{$this->quote_starts})(?P<quoted>$find)(?P<post>.) /".$this->regex_snippets['mod'],
5037              array($this, "fGlyphQuotedQuote"),
5038              $text
5039          );
5040      }
5041  
5042      /**
5043       * Formats quoted quotes and stores it on the shelf.
5044       *
5045       * @param  array  $m Named regular expression parts
5046       * @return string Input with quoted quotes removed and replaced with tokens
5047       * @see    Parser::glyphQuotedQuote()
5048       */
5049  
5050      protected function fGlyphQuotedQuote($m)
5051      {
5052          // Check the correct closing character was found.
5053          if (!isset($this->quotes[$m['pre']]) || $m['post'] !== $this->quotes[$m['pre']]) {
5054              return $m[0];
5055          }
5056  
5057          $pre = strtr($m['pre'], array(
5058              '"' => '&#8220;',
5059              "'" => '&#8216;',
5060              ' ' => '&nbsp;',
5061          ));
5062  
5063          $post = strtr($m['post'], array(
5064              '"' => '&#8221;',
5065              "'" => '&#8217;',
5066              ' ' => '&nbsp;',
5067          ));
5068  
5069          $found = $m['quoted'];
5070  
5071          if (strlen($found) > 1) {
5072              $found = rtrim($this->glyphs($m['quoted']));
5073          } elseif ('"' === $found) {
5074              $found = "&quot;";
5075          }
5076  
5077          return $this->shelve(' '.$pre.$found.$post.' ');
5078      }
5079  
5080      /**
5081       * Replaces glyphs in the given input.
5082       *
5083       * This method performs typographical glyph replacements. The input is split
5084       * across HTML-like tags in order to avoid attempting glyph
5085       * replacements within tags.
5086       *
5087       * @param  string $text Input Textile
5088       * @return string
5089       */
5090  
5091      protected function glyphs($text)
5092      {
5093          if (!$this->glyph_search) {
5094              return $text;
5095          }
5096  
5097          // Fix: hackish -- adds a space if final char of text is a double quote.
5098          if (($text = preg_replace('/"\z/', "\" ", $text)) === null) {
5099              return '';
5100          }
5101  
5102          $text = preg_split(
5103              "@(<[\w/!?].*>)@Us".$this->regex_snippets['mod'],
5104              $text,
5105              -1,
5106              PREG_SPLIT_DELIM_CAPTURE
5107          );
5108  
5109          if ($text === false) {
5110              return '';
5111          }
5112  
5113          $i = 0;
5114          $glyph_out = array();
5115  
5116          foreach ($text as $line) {
5117              // Text tag text tag text ...
5118              if (++$i % 2) {
5119                  // Raw < > & chars are already entity encoded in restricted mode
5120                  if (!$this->isRestrictedModeEnabled()) {
5121                      $line = preg_replace('/&(?!#?[a-z0-9]+;)/i', '&amp;', $line);
5122                      $line = str_replace(array('<', '>'), array('&lt;', '&gt;'), (string)$line);
5123                  }
5124  
5125                  $line = preg_replace($this->glyph_search, $this->glyph_replace, $line);
5126              }
5127  
5128              $glyph_out[] = $line;
5129          }
5130  
5131          return join('', $glyph_out);
5132      }
5133  
5134      /**
5135       * Replaces glyph references in the given input.
5136       *
5137       * This method removes temporary glyph: instances
5138       * from the input.
5139       *
5140       * @param  string $text The input
5141       * @return string Processed input
5142       */
5143  
5144      protected function replaceGlyphs($text)
5145      {
5146          return str_replace($this->uid.':glyph:', '', $text);
5147      }
5148  
5149      /**
5150       * Translates alignment tag into corresponding CSS text-align property value.
5151       *
5152       * @param  string $in The Textile alignment tag
5153       * @return string CSS text-align value
5154       */
5155  
5156      protected function hAlign($in)
5157      {
5158          $vals = array(
5159              '&lt;'     => 'left',
5160              '&gt;'     => 'right',
5161              '&lt;&gt;' => 'justify',
5162              '<'        => 'left',
5163              '='        => 'center',
5164              '>'        => 'right',
5165              '<>'       => 'justify',
5166          );
5167  
5168          return (isset($vals[$in])) ? $vals[$in] : '';
5169      }
5170  
5171      /**
5172       * Translates vertical alignment tag into corresponding CSS vertical-align property value.
5173       *
5174       * @param  string $in The Textile alignment tag
5175       * @return string CSS vertical-align value
5176       */
5177  
5178      protected function vAlign($in)
5179      {
5180          $vals = array(
5181              '^' => 'top',
5182              '-' => 'middle',
5183              '~' => 'bottom',
5184          );
5185  
5186          return (isset($vals[$in])) ? $vals[$in] : '';
5187      }
5188  
5189      /**
5190       * Converts character codes in the given input from HTML numeric character reference to character code.
5191       *
5192       * Conversion is done according to Textile's multi-byte conversion map.
5193       *
5194       * @param  string $text    The input
5195       * @param  string $charset The character set
5196       * @return string Processed input
5197       */
5198  
5199      protected function encodeHigh($text, $charset = 'UTF-8')
5200      {
5201          if ($this->isMultiByteStringSupported()) {
5202              return mb_encode_numericentity($text, $this->cmap, $charset);
5203          }
5204  
5205          return htmlentities($text, ENT_NOQUOTES, $charset);
5206      }
5207  
5208      /**
5209       * Converts numeric HTML character references to character code.
5210       *
5211       * @param  string $text    The input
5212       * @param  string $charset The character set
5213       * @return string Processed input
5214       */
5215  
5216      protected function decodeHigh($text, $charset = 'UTF-8')
5217      {
5218          $text = (string) intval($text) === (string) $text ? "&#$text;" : "&$text;";
5219  
5220          if ($this->isMultiByteStringSupported()) {
5221              return mb_decode_numericentity($text, $this->cmap, $charset);
5222          }
5223  
5224          return html_entity_decode($text, ENT_NOQUOTES, $charset);
5225      }
5226  
5227      /**
5228       * Convert special characters to HTML entities.
5229       *
5230       * This method's functionality is identical to PHP's own
5231       * htmlspecialchars(). In Textile this is used for sanitising
5232       * the input.
5233       *
5234       * @param  string $str    The string to encode
5235       * @param  bool   $quotes Encode quotes
5236       * @return string Encoded string
5237       * @see    htmlspecialchars()
5238       */
5239  
5240      protected function encodeHTML($str, $quotes = true)
5241      {
5242          $a = array(
5243              '&' => '&amp;',
5244              '<' => '&lt;',
5245              '>' => '&gt;',
5246          );
5247  
5248          if ($quotes) {
5249              $a = $a + array(
5250                  "'" => '&#39;', // Numeric, as in htmlspecialchars
5251                  '"' => '&quot;',
5252              );
5253          }
5254  
5255          return str_replace(array_keys($a), $a, $str);
5256      }
5257  
5258      /**
5259       * Convert special characters to HTML entities.
5260       *
5261       * This is identical to encodeHTML(), but takes restricted
5262       * mode into account. When in restricted mode, only escapes
5263       * quotes.
5264       *
5265       * @param  string $str    The string to encode
5266       * @param  bool   $quotes Encode quotes
5267       * @return string Encoded string
5268       * @see    Parser::encodeHTML()
5269       */
5270  
5271      protected function rEncodeHTML($str, $quotes = true)
5272      {
5273          // In restricted mode, all input but quotes has already been escaped
5274          if ($this->isRestrictedModeEnabled()) {
5275              return str_replace('"', '&quot;', $str);
5276          }
5277  
5278          return $this->encodeHTML($str, $quotes);
5279      }
5280  
5281      /**
5282       * Whether multiple mbstring extensions is loaded.
5283       *
5284       * @return bool
5285       * @since  3.5.5
5286       */
5287  
5288      protected function isMultiByteStringSupported()
5289      {
5290          if ($this->mb === null) {
5291              $this->mb = is_callable('mb_strlen');
5292          }
5293  
5294          return $this->mb;
5295      }
5296  
5297      /**
5298       * Whether PCRE supports UTF-8.
5299       *
5300       * @return bool
5301       * @since  3.5.5
5302       */
5303  
5304      protected function isUnicodePcreSupported()
5305      {
5306          return (bool) @preg_match('/\pL/u', 'a');
5307      }
5308  }