|
1 <?php |
|
2 |
|
3 /* |
|
4 * Enano - an open-source CMS capable of wiki functions, Drupal-like sidebar blocks, and everything in between |
|
5 * Version 1.1.6 (Caoineag beta 1) |
|
6 * Copyright (C) 2006-2008 Dan Fuhry |
|
7 * |
|
8 * This program is Free Software; you can redistribute and/or modify it under the terms of the GNU General Public License |
|
9 * as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. |
|
10 * |
|
11 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied |
|
12 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for details. |
|
13 */ |
|
14 |
|
15 class Carpenter_Parse_MediaWiki |
|
16 { |
|
17 public $rules = array( |
|
18 'bold' => "/'''(.+?)'''/", |
|
19 'italic' => "/''(.+?)''/", |
|
20 'underline' => '/__(.+?)__/', |
|
21 'externalwithtext' => '#\[((?:https?|irc|ftp)://.+?) (.+?)\]#', |
|
22 'externalnotext' => '#\[((?:https?|irc|ftp)://.+?)\]#' |
|
23 ); |
|
24 |
|
25 public function lang(&$text) |
|
26 { |
|
27 global $lang; |
|
28 |
|
29 preg_match_all('/<lang (?:code|id)="([a-z0-9_-]+)">([\w\W]+?)<\/lang>/', $text, $langmatch); |
|
30 foreach ( $langmatch[0] as $i => $match ) |
|
31 { |
|
32 if ( $langmatch[1][$i] == $lang->lang_code ) |
|
33 { |
|
34 $text = str_replace_once($match, $langmatch[2][$i], $text); |
|
35 } |
|
36 else |
|
37 { |
|
38 $text = str_replace_once($match, '', $text); |
|
39 } |
|
40 } |
|
41 |
|
42 return array(); |
|
43 } |
|
44 |
|
45 public function templates(&$text) |
|
46 { |
|
47 $template_regex = "/\{\{(.+)((\n|\|[ ]*([A-z0-9]+)[ ]*=[ ]*(.+))*)\}\}/isU"; |
|
48 $i = 0; |
|
49 while ( preg_match($template_regex, $text) ) |
|
50 { |
|
51 $i++; |
|
52 if ( $i == 5 ) |
|
53 break; |
|
54 $text = RenderMan::include_templates($text); |
|
55 } |
|
56 |
|
57 return array(); |
|
58 } |
|
59 |
|
60 public function heading(&$text) |
|
61 { |
|
62 if ( !preg_match_all('/^(={1,6}) *(.+?) *\\1$/m', $text, $results) ) |
|
63 return array(); |
|
64 |
|
65 $headings = array(); |
|
66 foreach ( $results[0] as $i => $match ) |
|
67 { |
|
68 $headings[] = array( |
|
69 'level' => strlen($results[1][$i]), |
|
70 'text' => $results[2][$i] |
|
71 ); |
|
72 } |
|
73 |
|
74 $text = Carpenter::tokenize($text, $results[0]); |
|
75 |
|
76 return $headings; |
|
77 } |
|
78 |
|
79 public function multilist(&$text) |
|
80 { |
|
81 // Match entire lists |
|
82 $regex = '/^ |
|
83 ([:#\*])+ # Initial list delimiter |
|
84 [ ]* |
|
85 .+? |
|
86 (?: |
|
87 \r?\n |
|
88 (?:\\1|[ ]{2,}) |
|
89 [ ]* |
|
90 .+?)* |
|
91 $/mx'; |
|
92 |
|
93 if ( !preg_match_all($regex, $text, $lists) ) |
|
94 return array(); |
|
95 |
|
96 $types = array( |
|
97 '*' => 'unordered', |
|
98 '#' => 'ordered', |
|
99 ':' => 'indent' |
|
100 ); |
|
101 |
|
102 $pieces = array(); |
|
103 foreach ( $lists[0] as $i => $list ) |
|
104 { |
|
105 $token = $lists[1][$i]; |
|
106 $piece = array( |
|
107 'type' => $types[$token], |
|
108 'items' => array() |
|
109 ); |
|
110 |
|
111 // convert windows newlines to unix |
|
112 $list = str_replace("\r\n", "\n", $list); |
|
113 $items_pre = explode("\n", $list); |
|
114 $items = array(); |
|
115 // first pass, go through and combine items that are newlined |
|
116 foreach ( $items_pre as $item ) |
|
117 { |
|
118 if ( substr($item, 0, 1) == $token ) |
|
119 { |
|
120 $items[] = $item; |
|
121 } |
|
122 else |
|
123 { |
|
124 // it's a continuation of the previous LI. Don't need to worry about |
|
125 // undefined indices here since the regex should filter out all invalid |
|
126 // markup. Just append this line to the previous. |
|
127 $items[ count($items) - 1 ] .= "\n" . trim($item); |
|
128 } |
|
129 } |
|
130 |
|
131 // second pass, separate items and tokens |
|
132 unset($items_pre); |
|
133 foreach ( $items as $item ) |
|
134 { |
|
135 // get the depth |
|
136 list($itemtoken) = explode(' ', $item); |
|
137 // get the text |
|
138 $itemtext = trim(substr($item, strlen($itemtoken))); |
|
139 $piece['items'][] = array( |
|
140 // depth starts at 1 |
|
141 'depth' => strlen($itemtoken), |
|
142 'text' => $itemtext |
|
143 ); |
|
144 } |
|
145 |
|
146 $pieces[] = $piece; |
|
147 } |
|
148 |
|
149 $text = Carpenter::tokenize($text, $lists[0]); |
|
150 |
|
151 return $pieces; |
|
152 } |
|
153 |
|
154 public function paragraph(&$text) |
|
155 { |
|
156 // This is potentially a hack. It allows the parser to stick in <_paragraph_bypass> tags |
|
157 // to prevent the paragraph parser from interfering with pretty HTML generated elsewhere. |
|
158 RenderMan::tag_strip('_paragraph_bypass', $text, $_nw); |
|
159 |
|
160 // The trick with paragraphs is to not turn things into them when a block level element already wraps the block of text. |
|
161 // First we need a list of block level elements (http://htmlhelp.com/reference/html40/block.html) |
|
162 $blocklevel = 'address|blockquote|center|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|ol|p|pre|table|ul'; |
|
163 |
|
164 $regex = "/^( |
|
165 (?:(?!(?:\\n|[ ]*<(?:{$blocklevel})))) # condition for starting paragraph: not a newline character or block level element |
|
166 .+? # body text |
|
167 (?: |
|
168 \\n # additional lines in the para |
|
169 (?:(?!(?:\\n|[ ]*<(?:{$blocklevel})))) # make sure of only one newline in a row, and no block level elements |
|
170 .*? |
|
171 )* |
|
172 )$ |
|
173 /mx"; |
|
174 |
|
175 if ( !preg_match_all($regex, $text, $matches) ) |
|
176 return array(); |
|
177 |
|
178 // Debugging :) |
|
179 // die('<pre>' . htmlspecialchars(print_r($matches, true)) . '</pre>'); |
|
180 |
|
181 // restore stripped |
|
182 RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw); |
|
183 |
|
184 // tokenize |
|
185 $text = Carpenter::tokenize($text, $matches[0]); |
|
186 |
|
187 return $matches[0]; |
|
188 } |
|
189 } |
|
190 |
|
191 function parser_mediawiki_xhtml_image($text) |
|
192 { |
|
193 $text = RenderMan::process_image_tags($text, $taglist); |
|
194 $text = RenderMan::process_imgtags_stage2($text, $taglist); |
|
195 return $text; |
|
196 } |
|
197 |
|
198 function parser_mediawiki_xhtml_tables($text) |
|
199 { |
|
200 return process_tables($text); |
|
201 } |
|
202 |