1027
+ − 1
<?php
+ − 2
+ − 3
/*
+ − 4
* Enano - an open-source CMS capable of wiki functions, Drupal-like sidebar blocks, and everything in between
1081
745200a9cc2a
Fixed some upgrade bugs; added support for choosing one's own date/time formats; rebrand as 1.1.7
Dan
diff
changeset
+ − 5
* Copyright (C) 2006-2009 Dan Fuhry
1027
+ − 6
*
+ − 7
* This program is Free Software; you can redistribute and/or modify it under the terms of the GNU General Public License
+ − 8
* as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.
+ − 9
*
+ − 10
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ − 11
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for details.
+ − 12
*/
+ − 13
+ − 14
class Carpenter_Parse_MediaWiki
+ − 15
{
+ − 16
public $rules = array(
+ − 17
'bold' => "/'''(.+?)'''/",
+ − 18
'italic' => "/''(.+?)''/",
+ − 19
'underline' => '/__(.+?)__/',
+ − 20
'externalwithtext' => '#\[((?:https?|irc|ftp)://.+?) (.+?)\]#',
1106
+ − 21
'externalnotext' => '#\[((?:https?|irc|ftp)://.+?)\]#',
+ − 22
'hr' => '/^[-]{4,} *$/m'
1027
+ − 23
);
+ − 24
1078
67a4c839c7e1
Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
diff
changeset
+ − 25
private $blockquote_rand_id;
67a4c839c7e1
Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
diff
changeset
+ − 26
1027
+ − 27
public function lang(&$text)
+ − 28
{
+ − 29
global $lang;
+ − 30
+ − 31
preg_match_all('/<lang (?:code|id)="([a-z0-9_-]+)">([\w\W]+?)<\/lang>/', $text, $langmatch);
+ − 32
foreach ( $langmatch[0] as $i => $match )
+ − 33
{
+ − 34
if ( $langmatch[1][$i] == $lang->lang_code )
+ − 35
{
+ − 36
$text = str_replace_once($match, $langmatch[2][$i], $text);
+ − 37
}
+ − 38
else
+ − 39
{
+ − 40
$text = str_replace_once($match, '', $text);
+ − 41
}
+ − 42
}
+ − 43
+ − 44
return array();
+ − 45
}
+ − 46
+ − 47
public function templates(&$text)
+ − 48
{
+ − 49
$template_regex = "/\{\{(.+)((\n|\|[ ]*([A-z0-9]+)[ ]*=[ ]*(.+))*)\}\}/isU";
+ − 50
$i = 0;
1054
e6b14d33ac55
Renderer: added "smart paragraphs" for templates. <p><b>Foo</b> {bar}</p> where bar is multiline is basically turned into proper XHTML paragraphs.
Dan
diff
changeset
+ − 51
while ( preg_match($template_regex, $text, $match) )
1027
+ − 52
{
+ − 53
$i++;
+ − 54
if ( $i == 5 )
+ − 55
break;
+ − 56
$text = RenderMan::include_templates($text);
+ − 57
}
+ − 58
+ − 59
return array();
+ − 60
}
+ − 61
+ − 62
public function heading(&$text)
+ − 63
{
1031
8a4b75e73137
Wiki formatting: Headings: tolerate spaces after line; added disable_rule method (required for rev. 1029)
Dan
diff
changeset
+ − 64
if ( !preg_match_all('/^(={1,6}) *(.+?) *\\1 *$/m', $text, $results) )
1027
+ − 65
return array();
+ − 66
+ − 67
$headings = array();
+ − 68
foreach ( $results[0] as $i => $match )
+ − 69
{
+ − 70
$headings[] = array(
+ − 71
'level' => strlen($results[1][$i]),
+ − 72
'text' => $results[2][$i]
+ − 73
);
+ − 74
}
+ − 75
+ − 76
$text = Carpenter::tokenize($text, $results[0]);
+ − 77
+ − 78
return $headings;
+ − 79
}
+ − 80
+ − 81
public function multilist(&$text)
+ − 82
{
+ − 83
// Match entire lists
+ − 84
$regex = '/^
+ − 85
([:#\*])+ # Initial list delimiter
+ − 86
[ ]*
+ − 87
.+?
+ − 88
(?:
+ − 89
\r?\n
+ − 90
(?:\\1|[ ]{2,})
+ − 91
[ ]*
+ − 92
.+?)*
+ − 93
$/mx';
+ − 94
+ − 95
if ( !preg_match_all($regex, $text, $lists) )
+ − 96
return array();
+ − 97
+ − 98
$types = array(
+ − 99
'*' => 'unordered',
+ − 100
'#' => 'ordered',
+ − 101
':' => 'indent'
+ − 102
);
+ − 103
+ − 104
$pieces = array();
+ − 105
foreach ( $lists[0] as $i => $list )
+ − 106
{
+ − 107
$token = $lists[1][$i];
+ − 108
$piece = array(
+ − 109
'type' => $types[$token],
+ − 110
'items' => array()
+ − 111
);
+ − 112
+ − 113
// convert windows newlines to unix
+ − 114
$list = str_replace("\r\n", "\n", $list);
+ − 115
$items_pre = explode("\n", $list);
+ − 116
$items = array();
+ − 117
// first pass, go through and combine items that are newlined
+ − 118
foreach ( $items_pre as $item )
+ − 119
{
+ − 120
if ( substr($item, 0, 1) == $token )
+ − 121
{
+ − 122
$items[] = $item;
+ − 123
}
+ − 124
else
+ − 125
{
+ − 126
// it's a continuation of the previous LI. Don't need to worry about
+ − 127
// undefined indices here since the regex should filter out all invalid
+ − 128
// markup. Just append this line to the previous.
+ − 129
$items[ count($items) - 1 ] .= "\n" . trim($item);
+ − 130
}
+ − 131
}
+ − 132
+ − 133
// second pass, separate items and tokens
+ − 134
unset($items_pre);
+ − 135
foreach ( $items as $item )
+ − 136
{
+ − 137
// get the depth
1073
b19a9bcb6a45
More work on rendering engine. Fixed some bugs with paragraph skipping and added (incomplete) support for blockquotes.
Dan
diff
changeset
+ − 138
$itemtoken = preg_replace('/^([#:\*]+).*$/s', '$1', $item);
1027
+ − 139
// get the text
+ − 140
$itemtext = trim(substr($item, strlen($itemtoken)));
+ − 141
$piece['items'][] = array(
+ − 142
// depth starts at 1
+ − 143
'depth' => strlen($itemtoken),
+ − 144
'text' => $itemtext
+ − 145
);
+ − 146
}
+ − 147
$pieces[] = $piece;
+ − 148
}
+ − 149
+ − 150
$text = Carpenter::tokenize($text, $lists[0]);
+ − 151
+ − 152
return $pieces;
+ − 153
}
+ − 154
1073
b19a9bcb6a45
More work on rendering engine. Fixed some bugs with paragraph skipping and added (incomplete) support for blockquotes.
Dan
diff
changeset
+ − 155
public function blockquote(&$text)
b19a9bcb6a45
More work on rendering engine. Fixed some bugs with paragraph skipping and added (incomplete) support for blockquotes.
Dan
diff
changeset
+ − 156
{
1078
67a4c839c7e1
Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
diff
changeset
+ − 157
$rand_id = hexencode(AESCrypt::randkey(16), '', '');
1073
b19a9bcb6a45
More work on rendering engine. Fixed some bugs with paragraph skipping and added (incomplete) support for blockquotes.
Dan
diff
changeset
+ − 158
1078
67a4c839c7e1
Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
diff
changeset
+ − 159
while ( preg_match_all('/^(?:(>+) *.+(?:\r?\n|$))+/m', $text, $quotes) )
67a4c839c7e1
Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
diff
changeset
+ − 160
{
67a4c839c7e1
Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
diff
changeset
+ − 161
foreach ( $quotes[0] as $quote )
67a4c839c7e1
Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
diff
changeset
+ − 162
{
67a4c839c7e1
Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
diff
changeset
+ − 163
$piece = trim(preg_replace('/^> */m', '', $quote));
67a4c839c7e1
Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
diff
changeset
+ − 164
$text = str_replace_once($quote, "{blockquote:$rand_id}\n$piece\n{/blockquote:$rand_id}\n", $text);
67a4c839c7e1
Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
diff
changeset
+ − 165
}
67a4c839c7e1
Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
diff
changeset
+ − 166
}
67a4c839c7e1
Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
diff
changeset
+ − 167
//die('<pre>' . htmlspecialchars($text) . '</pre>');
67a4c839c7e1
Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
diff
changeset
+ − 168
67a4c839c7e1
Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
diff
changeset
+ − 169
$this->blockquote_rand_id = $rand_id;
67a4c839c7e1
Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
diff
changeset
+ − 170
}
67a4c839c7e1
Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
diff
changeset
+ − 171
67a4c839c7e1
Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
diff
changeset
+ − 172
public function blockquotepost(&$text)
67a4c839c7e1
Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
diff
changeset
+ − 173
{
67a4c839c7e1
Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
diff
changeset
+ − 174
return $this->blockquote_rand_id;
1073
b19a9bcb6a45
More work on rendering engine. Fixed some bugs with paragraph skipping and added (incomplete) support for blockquotes.
Dan
diff
changeset
+ − 175
}
b19a9bcb6a45
More work on rendering engine. Fixed some bugs with paragraph skipping and added (incomplete) support for blockquotes.
Dan
diff
changeset
+ − 176
1027
+ − 177
public function paragraph(&$text)
+ − 178
{
1044
+ − 179
// The trick with paragraphs is to not turn things into them when a block level element already wraps the block of text.
+ − 180
// First we need a list of block level elements (http://htmlhelp.com/reference/html40/block.html + some Enano extensions)
+ − 181
$blocklevel = 'address|blockquote|center|code|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|li|ol|p|pre|table|ul|tr|td|th|tbody|thead|tfoot';
+ − 182
+ − 183
// Wrap all block level tags
1073
b19a9bcb6a45
More work on rendering engine. Fixed some bugs with paragraph skipping and added (incomplete) support for blockquotes.
Dan
diff
changeset
+ − 184
RenderMan::tag_strip('_paragraph_bypass', $text, $_nw);
1127
4b858862c35c
More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
diff
changeset
+ − 185
// I'm not sure why I had to go through all these alternatives. Trying to bring it
4b858862c35c
More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
diff
changeset
+ − 186
// all down to one by ?'ing subpatterns was causing things to return empty and throwing
4b858862c35c
More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
diff
changeset
+ − 187
// errors in the parser. Eventually, around ~3:57AM I just settled on this motherf---er
4b858862c35c
More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
diff
changeset
+ − 188
// of a regular expression.
4b858862c35c
More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
diff
changeset
+ − 189
$regex = ";
4b858862c35c
More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
diff
changeset
+ − 190
<($blocklevel)
4b858862c35c
More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
diff
changeset
+ − 191
(?:
4b858862c35c
More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
diff
changeset
+ − 192
# self closing, no attributes
4b858862c35c
More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
diff
changeset
+ − 193
[ ]*/>
4b858862c35c
More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
diff
changeset
+ − 194
|
4b858862c35c
More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
diff
changeset
+ − 195
# self closing, attributes
4b858862c35c
More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
diff
changeset
+ − 196
[ ][^>]+? />
4b858862c35c
More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
diff
changeset
+ − 197
|
4b858862c35c
More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
diff
changeset
+ − 198
# with inner text, no attributes
4b858862c35c
More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
diff
changeset
+ − 199
>
4b858862c35c
More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
diff
changeset
+ − 200
(?: (?R) | .*? )*</\\1>
4b858862c35c
More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
diff
changeset
+ − 201
|
4b858862c35c
More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
diff
changeset
+ − 202
# with inner text and attributes
4b858862c35c
More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
diff
changeset
+ − 203
[ ][^>]+? # attributes
4b858862c35c
More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
diff
changeset
+ − 204
>
4b858862c35c
More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
diff
changeset
+ − 205
(?: (?R) | .*? )*</\\1>
4b858862c35c
More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
diff
changeset
+ − 206
)
4b858862c35c
More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
diff
changeset
+ − 207
;sx";
4b858862c35c
More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
diff
changeset
+ − 208
4b858862c35c
More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
diff
changeset
+ − 209
$text = preg_replace($regex, '<_paragraph_bypass>$0</_paragraph_bypass>', $text);
1073
b19a9bcb6a45
More work on rendering engine. Fixed some bugs with paragraph skipping and added (incomplete) support for blockquotes.
Dan
diff
changeset
+ − 210
RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw, true);
1044
+ − 211
1027
+ − 212
// This is potentially a hack. It allows the parser to stick in <_paragraph_bypass> tags
+ − 213
// to prevent the paragraph parser from interfering with pretty HTML generated elsewhere.
+ − 214
RenderMan::tag_strip('_paragraph_bypass', $text, $_nw);
+ − 215
1044
+ − 216
$startcond = "(?!(?:[\\r\\n]|\{_paragraph_bypass:[a-f0-9]{32}:[0-9]+\}|[ ]*<\/?(?:$blocklevel)(?: .+>|>)))";
+ − 217
$regex = "/^
+ − 218
$startcond # line start condition - do not match if the line starts with the condition above
+ − 219
.+? # body text
1027
+ − 220
(?:
1044
+ − 221
\\n # additional lines
+ − 222
$startcond # make sure of only one newline in a row, and end the paragraph if a new line fails the start condition
1027
+ − 223
.*?
1044
+ − 224
)* # keep going until it fails
+ − 225
$
1027
+ − 226
/mx";
+ − 227
+ − 228
if ( !preg_match_all($regex, $text, $matches) )
1044
+ − 229
{
+ − 230
RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw);
1027
+ − 231
return array();
1044
+ − 232
}
1027
+ − 233
+ − 234
// Debugging :)
1044
+ − 235
// die('<pre>' . htmlspecialchars($text) . "\n-----------------------------------------------------------\n" . htmlspecialchars(print_r($matches, true)) . '</pre>');
1027
+ − 236
+ − 237
// restore stripped
+ − 238
RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw);
+ − 239
+ − 240
// tokenize
+ − 241
$text = Carpenter::tokenize($text, $matches[0]);
+ − 242
+ − 243
return $matches[0];
+ − 244
}
+ − 245
}
+ − 246
+ − 247
function parser_mediawiki_xhtml_image($text)
+ − 248
{
+ − 249
$text = RenderMan::process_image_tags($text, $taglist);
+ − 250
$text = RenderMan::process_imgtags_stage2($text, $taglist);
+ − 251
return $text;
+ − 252
}
+ − 253
+ − 254
function parser_mediawiki_xhtml_tables($text)
+ − 255
{
+ − 256
return process_tables($text);
+ − 257
}
+ − 258