Wiki engine: improved behavior in block level element finding/wrapping algorithm
Wed, 08 Jul 2009 18:52:41 -0400 (2009-07-08)
changeset 1044 ad6a22377507
parent 1043 ac1ecaacb48e
child 1045 ce069a06906c
Wiki engine: improved behavior in block level element finding/wrapping algorithm
--- a/includes/render.php	Wed Jul 08 18:51:48 2009 -0400
+++ b/includes/render.php	Wed Jul 08 18:52:41 2009 -0400
@@ -193,47 +193,41 @@
     // this is still needed by parser plugins
     $random_id = md5( time() . mt_rand() );
-    // Strip out <nowiki> sections and PHP code
+    // Strip out <nowiki> sections
     self::nowiki_strip($text, $nowiki_stripped);
+    // Run early parsing plugins
     $code = $plugins->setHook('render_wikiformat_veryearly');
     foreach ( $code as $cmd )
+    // Strip out embedded PHP
     self::php_strip($text, $php_stripped);
+    // Perform render through the engine
     $carpenter = new Carpenter();
     $carpenter->flags = $flags;
     $carpenter->hook(array(__CLASS__, 'hook_pre'), PO_AFTER, 'lang');
     $carpenter->hook(array(__CLASS__, 'hook_posttemplates'), PO_AFTER, 'templates');
     if ( $flags & RENDER_WIKI_TEMPLATE )
-      // FIXME: process noinclude/nodisplay
+      // FIXME: Where is noinclude/nodisplay being processed in the pipeline? (Seems to be processed, but not here)
     $text = $carpenter->render($text);
     // For plugin compat
     $result =& $text;
+    // Post processing hook
     $code = $plugins->setHook('render_wikiformat_post');
     foreach ( $code as $cmd )
-    /*
-    $text = preg_replace('/<noinclude>(.*?)<\/noinclude>/is', '\\1', $text);
-    if ( $paths->namespace == 'Template' )
-    {
-      $text = preg_replace('/<nodisplay>(.*?)<\/nodisplay>/is', '', $text);
-    }
-    $text = process_tables($text);
-    */
+    // Add PHP and nowiki back in
     self::nowiki_unstrip($text, $nowiki_stripped);
     self::php_unstrip($text, $php_stripped);
--- a/includes/wikiengine/parse_mediawiki.php	Wed Jul 08 18:51:48 2009 -0400
+++ b/includes/wikiengine/parse_mediawiki.php	Wed Jul 08 18:52:41 2009 -0400
@@ -133,7 +133,7 @@
       foreach ( $items as $item )
         // get the depth
-        list($itemtoken) = explode(' ', $item);
+        $itemtoken = preg_replace('/[^#:\*].*$/', '', $item);
         // get the text
         $itemtext = trim(substr($item, strlen($itemtoken)));
         $piece['items'][] = array(
@@ -153,30 +153,37 @@
   public function paragraph(&$text)
+    // The trick with paragraphs is to not turn things into them when a block level element already wraps the block of text.
+    // First we need a list of block level elements ( + some Enano extensions)
+    $blocklevel = 'address|blockquote|center|code|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|li|ol|p|pre|table|ul|tr|td|th|tbody|thead|tfoot';
+    // Wrap all block level tags
+    $text = preg_replace("/<($blocklevel)(?: .+?>|>)(?:(?R)|.*?)<\/\\1>/s", '<_paragraph_bypass>$0</_paragraph_bypass>', $text);
     // This is potentially a hack. It allows the parser to stick in <_paragraph_bypass> tags
     // to prevent the paragraph parser from interfering with pretty HTML generated elsewhere.
     RenderMan::tag_strip('_paragraph_bypass', $text, $_nw);
-    // The trick with paragraphs is to not turn things into them when a block level element already wraps the block of text.
-    // First we need a list of block level elements (
-    $blocklevel = 'address|blockquote|center|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|ol|p|pre|table|ul';
-    $regex = "/^(
-                (?:(?!(?:\\n|[ ]*<(?:{$blocklevel}))))    # condition for starting paragraph: not a newline character or block level element
-                .+?                                       # body text
+    $startcond = "(?!(?:[\\r\\n]|\{_paragraph_bypass:[a-f0-9]{32}:[0-9]+\}|[ ]*<\/?(?:$blocklevel)(?: .+>|>)))";
+    $regex = "/^
+                $startcond        # line start condition - do not match if the line starts with the condition above
+                .+?               # body text
-                  \\n                                     # additional lines in the para
-                  (?:(?!(?:\\n|[ ]*<(?:{$blocklevel}))))  # make sure of only one newline in a row, and no block level elements
+                  \\n             # additional lines
+                  $startcond      # make sure of only one newline in a row, and end the paragraph if a new line fails the start condition
-                )*
-              )$
+                )*                # keep going until it fails
+              $
     if ( !preg_match_all($regex, $text, $matches) )
+    {
+      RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw);
       return array();
+    }
     // Debugging :)
-    // die('<pre>' . htmlspecialchars(print_r($matches, true)) . '</pre>');
+    // die('<pre>' . htmlspecialchars($text) . "\n-----------------------------------------------------------\n" . htmlspecialchars(print_r($matches, true)) . '</pre>');
     // restore stripped
     RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw);
--- a/includes/wikiengine/render_xhtml.php	Wed Jul 08 18:51:48 2009 -0400
+++ b/includes/wikiengine/render_xhtml.php	Wed Jul 08 18:52:41 2009 -0400
@@ -60,7 +60,7 @@
           $itag = 'dd';
-      $list = "<$btag><_paragraph_bypass>\n";
+      $list = "<$btag>\n";
       $spacing = '';
       $depth = 1;
       foreach ( $piece['items'] as $j => $item )
@@ -108,7 +108,7 @@
         $spacing = substr($spacing, 4);
-      $list .= "</_paragraph_bypass></$btag>\n";
+      $list .= "</$btag>\n";
       $text = str_replace(Carpenter::generate_token($i), $list, $text);
     return $text;