20 'externalwithtext' => '#\[((?:https?|irc|ftp)://.+?) (.+?)\]#', |
20 'externalwithtext' => '#\[((?:https?|irc|ftp)://.+?) (.+?)\]#', |
21 'externalnotext' => '#\[((?:https?|irc|ftp)://.+?)\]#', |
21 'externalnotext' => '#\[((?:https?|irc|ftp)://.+?)\]#', |
22 'mailtonotext' => '#\[mailto:([^ \]]+?)\]#', |
22 'mailtonotext' => '#\[mailto:([^ \]]+?)\]#', |
23 'mailtowithtext' => '#\[mailto:([^ \]]+?) (.+?)\]#', |
23 'mailtowithtext' => '#\[mailto:([^ \]]+?) (.+?)\]#', |
24 'hr' => '/^[-]{4,} *$/m', |
24 'hr' => '/^[-]{4,} *$/m', |
25 'code' => '/^<code>(?:\r?\n)?(.+?)(?:\r?\n)?<\/code>$/mis' |
25 'code' => '/^(?:<code>(?:\r?\n)?|<pre>)(.+?)(?:<\/pre>|(?:\r?\n)?<\/code>)$/mis' |
26 ); |
26 ); |
27 |
27 |
28 private $blockquote_rand_id; |
28 private $blockquote_rand_id; |
29 |
29 |
30 public function lang(&$text) |
30 public function lang(&$text) |
184 $blocklevel = 'address|blockquote|center|code|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|li|ol|p|pre|table|ul|tr|td|th|tbody|thead|tfoot'; |
184 $blocklevel = 'address|blockquote|center|code|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|li|ol|p|pre|table|ul|tr|td|th|tbody|thead|tfoot'; |
185 |
185 |
186 // Wrap all block level tags |
186 // Wrap all block level tags |
187 RenderMan::tag_strip('_paragraph_bypass', $text, $_nw); |
187 RenderMan::tag_strip('_paragraph_bypass', $text, $_nw); |
188 |
188 |
189 // I'm not sure why I had to go through all these alternatives. Trying to bring it |
189 // Find all opening and closing tags |
190 // all down to one by ?'ing subpatterns was causing things to return empty and throwing |
190 |
191 // errors in the parser. Eventually, around ~3:57AM I just settled on this motherf---er |
191 $regex = ";(<(?:/(?:$blocklevel)|(?:$blocklevel)(?: [^>]*?)?)>);s"; |
192 // of a regular expression. |
|
193 |
|
194 // FIXME: This regexp triggers a known PHP stack size issue under win32 and possibly |
|
195 // other platforms (<http://bugs.php.net/bug.php?id=47689>). The workaround is going to |
|
196 // involve writing our own parser that takes care of recursion without using the stack, |
|
197 // which is going to be a bitch, and may not make it in until Caoineag RCs. |
|
198 |
|
199 $regex = "; |
|
200 <($blocklevel) |
|
201 (?: |
|
202 # self closing, no attributes |
|
203 [ ]*/> |
|
204 | |
|
205 # self closing, attributes |
|
206 [ ][^>]+? /> |
|
207 | |
|
208 # with inner text, no attributes |
|
209 > |
|
210 (?: (?R) | .*? )*</\\1> |
|
211 | |
|
212 # with inner text and attributes |
|
213 [ ][^>]+? # attributes |
|
214 > |
|
215 (?: (?R) | .*? )*</\\1> |
|
216 ) |
|
217 ;sx"; |
|
218 |
192 |
219 // oh. and we're using this tokens thing because for identical matches, the first match will |
193 // oh. and we're using this tokens thing because for identical matches, the first match will |
220 // get wrapped X number of times instead of all matches getting wrapped once; replacing each |
194 // get wrapped X number of times instead of all matches getting wrapped once; replacing each |
221 // with a unique token id remedies this |
195 // with a unique token id remedies this |
222 |
196 |
223 $tokens = array(); |
197 $tokens = array(); |
224 $rand_id = sha1(microtime() . mt_rand()); |
198 $rand_id = sha1(microtime() . mt_rand()); |
225 |
199 $tag_stack = array(); |
226 // Temporary hack to fix crashes under win32. Sometime I'll write a loop based |
200 |
227 // parser for this whole section. Maybe. Perhaps the Apache folks will fix their |
201 if ( $text_split = preg_split($regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE) ) |
228 // Windows binaries first. |
202 { |
229 if ( PHP_OS == 'WIN32' || PHP_OS == 'WINNT' ) |
203 $text = ''; |
230 { |
204 // go through the text, extract tag names, and push them to a stack. |
231 $regex = str_replace("(?: (?R) | .*? )*", "(?: .*? )", $regex); |
205 foreach ( $text_split as $splitpart ) |
232 } |
206 { |
233 if ( preg_match_all($regex, $text, $matches) ) |
207 if ( preg_match(";^<(/)?($blocklevel)( |>);i", $splitpart, $match) ) |
234 { |
208 { |
235 foreach ( $matches[0] as $i => $match ) |
209 $tagname = $match[2]; |
236 { |
210 if ( $match[1] == '/' ) |
237 $text = str_replace_once($match, "{_pb_:$rand_id:$i}", $text); |
211 { |
238 $tokens[$i] = '<_paragraph_bypass>' . $match . '</_paragraph_bypass>'; |
212 // closing tag |
239 } |
213 if ( $tagname != ($top = array_pop($tag_stack)) ) |
240 } |
214 { |
241 |
215 // invalid - push back |
242 foreach ( $tokens as $i => $match ) |
216 array_push($tag_stack, $top); |
243 { |
217 } |
244 $text = str_replace_once("{_pb_:$rand_id:$i}", $match, $text); |
218 else |
245 } |
219 { |
|
220 // valid - if stack's at zero, add a </_paragraph_bypass> |
|
221 if ( count($tag_stack) == 0 ) |
|
222 $splitpart .= '</_paragraph_bypass>'; |
|
223 } |
|
224 } |
|
225 else |
|
226 { |
|
227 // push |
|
228 array_push($tag_stack, $tagname); |
|
229 if ( count($tag_stack) == 1 ) |
|
230 $splitpart = '<_paragraph_bypass>' . $splitpart; |
|
231 } |
|
232 } |
|
233 $text .= $splitpart; |
|
234 } |
|
235 //echo '<pre>' . htmlspecialchars(print_r($text, true)) . '</pre>'; |
|
236 } |
|
237 |
|
238 // All things that should be para-bypassed now are surrounded by _paragraph_bypass tags. |
246 |
239 |
247 // die('<pre>' . htmlspecialchars($text) . '</pre>'); |
240 // die('<pre>' . htmlspecialchars($text) . '</pre>'); |
248 |
241 |
249 RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw, true); |
242 RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw, true); |
250 |
243 |