More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
--- a/includes/wikiengine/TagSanitizer.php Wed Sep 30 20:01:23 2009 -0400
+++ b/includes/wikiengine/TagSanitizer.php Sun Oct 04 03:59:38 2009 -0400
@@ -29,7 +29,7 @@
($space*=$space*
(?:
# The attribute value: quoted or alone
- ".'"'."([^<".'"'."]*)".'"'."
+ \"([^<\"]*)\"
| '([^<']*)'
| ([a-zA-Z0-9!#$%&()*,\\-.\\/:;<>?@[\\]^_`{|}~]+)
| (\#[0-9a-fA-F]+) # Technically wrong, but lots of
--- a/includes/wikiengine/parse_mediawiki.php Wed Sep 30 20:01:23 2009 -0400
+++ b/includes/wikiengine/parse_mediawiki.php Sun Oct 04 03:59:38 2009 -0400
@@ -182,7 +182,31 @@
// Wrap all block level tags
RenderMan::tag_strip('_paragraph_bypass', $text, $_nw);
- $text = preg_replace("/<($blocklevel)(?: .+?>|>)(?:(?R)|.*?)<\/\\1>/s", '<_paragraph_bypass>$0</_paragraph_bypass>', $text);
+ // I'm not sure why I had to go through all these alternatives. Trying to bring it
+ // all down to one by ?'ing subpatterns was causing things to return empty and throwing
+ // errors in the parser. Eventually, around ~3:57AM I just settled on this motherf---er
+ // of a regular expression.
+ $regex = ";
+ <($blocklevel)
+ (?:
+ # self closing, no attributes
+ [ ]*/>
+ |
+ # self closing, attributes
+ [ ][^>]+? />
+ |
+ # with inner text, no attributes
+ >
+ (?: (?R) | .*? )*</\\1>
+ |
+ # with inner text and attributes
+ [ ][^>]+? # attributes
+ >
+ (?: (?R) | .*? )*</\\1>
+ )
+ ;sx";
+
+ $text = preg_replace($regex, '<_paragraph_bypass>$0</_paragraph_bypass>', $text);
RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw, true);
// This is potentially a hack. It allows the parser to stick in <_paragraph_bypass> tags
--- a/includes/wikiengine/render_xhtml.php Wed Sep 30 20:01:23 2009 -0400
+++ b/includes/wikiengine/render_xhtml.php Sun Oct 04 03:59:38 2009 -0400
@@ -29,6 +29,8 @@
foreach ( $pieces as $i => $piece )
{
$tocid = sanitize_page_id(trim($piece['text']));
+ // (bad) workaround for links in headings
+ $tocid = str_replace(array('[', ']'), '', $tocid);
$tag = '<h' . $piece['level'] . ' id="head:' . $tocid . '">';
$tag .= trim($piece['text']);
$tag .= '</h' . $piece['level'] . '>';
--- a/includes/wikiformat.php Wed Sep 30 20:01:23 2009 -0400
+++ b/includes/wikiformat.php Sun Oct 04 03:59:38 2009 -0400
@@ -152,7 +152,14 @@
}
// execute rule
+ $text_before = $text;
$text = $this->perform_render_step($text, $rule, $parser, $renderer);
+ if ( empty($text) )
+ {
+ trigger_error("Wikitext was empty after rule \"$rule\"; restoring backup", E_USER_WARNING);
+ $text = $text_before;
+ }
+ unset($text_before);
// run posthooks
foreach ( $this->hooks as $hook )