includes/wikiengine/parse_mediawiki.php
changeset 1044 ad6a22377507
parent 1031 8a4b75e73137
child 1054 e6b14d33ac55
equal deleted inserted replaced
1043:ac1ecaacb48e 1044:ad6a22377507
   131       // second pass, separate items and tokens
   131       // second pass, separate items and tokens
   132       unset($items_pre);
   132       unset($items_pre);
   133       foreach ( $items as $item )
   133       foreach ( $items as $item )
   134       {
   134       {
   135         // get the depth
   135         // get the depth
   136         list($itemtoken) = explode(' ', $item);
   136         $itemtoken = preg_replace('/[^#:\*].*$/', '', $item);
   137         // get the text
   137         // get the text
   138         $itemtext = trim(substr($item, strlen($itemtoken)));
   138         $itemtext = trim(substr($item, strlen($itemtoken)));
   139         $piece['items'][] = array(
   139         $piece['items'][] = array(
   140             // depth starts at 1
   140             // depth starts at 1
   141             'depth' => strlen($itemtoken),
   141             'depth' => strlen($itemtoken),
   151     return $pieces;
   151     return $pieces;
   152   }
   152   }
   153   
   153   
   154   public function paragraph(&$text)
   154   public function paragraph(&$text)
   155   {
   155   {
       
   156     // The trick with paragraphs is to not turn things into them when a block level element already wraps the block of text.
       
   157     // First we need a list of block level elements (http://htmlhelp.com/reference/html40/block.html + some Enano extensions)
       
   158     $blocklevel = 'address|blockquote|center|code|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|li|ol|p|pre|table|ul|tr|td|th|tbody|thead|tfoot';
       
   159     
       
   160     // Wrap all block level tags
       
   161     $text = preg_replace("/<($blocklevel)(?: .+?>|>)(?:(?R)|.*?)<\/\\1>/s", '<_paragraph_bypass>$0</_paragraph_bypass>', $text);
       
   162     
   156     // This is potentially a hack. It allows the parser to stick in <_paragraph_bypass> tags
   163     // This is potentially a hack. It allows the parser to stick in <_paragraph_bypass> tags
   157     // to prevent the paragraph parser from interfering with pretty HTML generated elsewhere.
   164     // to prevent the paragraph parser from interfering with pretty HTML generated elsewhere.
   158     RenderMan::tag_strip('_paragraph_bypass', $text, $_nw);
   165     RenderMan::tag_strip('_paragraph_bypass', $text, $_nw);
   159     
   166     
   160     // The trick with paragraphs is to not turn things into them when a block level element already wraps the block of text.
   167     $startcond = "(?!(?:[\\r\\n]|\{_paragraph_bypass:[a-f0-9]{32}:[0-9]+\}|[ ]*<\/?(?:$blocklevel)(?: .+>|>)))";
   161     // First we need a list of block level elements (http://htmlhelp.com/reference/html40/block.html)
   168     $regex = "/^
   162     $blocklevel = 'address|blockquote|center|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|ol|p|pre|table|ul';
   169                 $startcond        # line start condition - do not match if the line starts with the condition above
   163     
   170                 .+?               # body text
   164     $regex = "/^(
       
   165                 (?:(?!(?:\\n|[ ]*<(?:{$blocklevel}))))    # condition for starting paragraph: not a newline character or block level element
       
   166                 .+?                                       # body text
       
   167                 (?:
   171                 (?:
   168                   \\n                                     # additional lines in the para
   172                   \\n             # additional lines
   169                   (?:(?!(?:\\n|[ ]*<(?:{$blocklevel}))))  # make sure of only one newline in a row, and no block level elements
   173                   $startcond      # make sure of only one newline in a row, and end the paragraph if a new line fails the start condition
   170                   .*?
   174                   .*?
   171                 )*
   175                 )*                # keep going until it fails
   172               )$
   176               $
   173               /mx";
   177               /mx";
   174     
   178     
   175     if ( !preg_match_all($regex, $text, $matches) )
   179     if ( !preg_match_all($regex, $text, $matches) )
       
   180     {
       
   181       RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw);
   176       return array();
   182       return array();
       
   183     }
   177     
   184     
   178     // Debugging :)
   185     // Debugging :)
   179     // die('<pre>' . htmlspecialchars(print_r($matches, true)) . '</pre>');
   186     // die('<pre>' . htmlspecialchars($text) . "\n-----------------------------------------------------------\n" . htmlspecialchars(print_r($matches, true)) . '</pre>');
   180     
   187     
   181     // restore stripped
   188     // restore stripped
   182     RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw);
   189     RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw);
   183     
   190     
   184     // tokenize
   191     // tokenize