1027
|
1 |
<?php
|
|
2 |
|
|
3 |
/*
|
|
4 |
* Enano - an open-source CMS capable of wiki functions, Drupal-like sidebar blocks, and everything in between
|
|
5 |
* Version 1.1.6 (Caoineag beta 1)
|
|
6 |
* Copyright (C) 2006-2008 Dan Fuhry
|
|
7 |
*
|
|
8 |
* This program is Free Software; you can redistribute and/or modify it under the terms of the GNU General Public License
|
|
9 |
* as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.
|
|
10 |
*
|
|
11 |
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
|
|
12 |
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for details.
|
|
13 |
*/
|
|
14 |
|
|
15 |
class Carpenter_Parse_MediaWiki
|
|
16 |
{
|
|
17 |
public $rules = array(
|
|
18 |
'bold' => "/'''(.+?)'''/",
|
|
19 |
'italic' => "/''(.+?)''/",
|
|
20 |
'underline' => '/__(.+?)__/',
|
|
21 |
'externalwithtext' => '#\[((?:https?|irc|ftp)://.+?) (.+?)\]#',
|
|
22 |
'externalnotext' => '#\[((?:https?|irc|ftp)://.+?)\]#'
|
|
23 |
);
|
|
24 |
|
|
25 |
public function lang(&$text)
|
|
26 |
{
|
|
27 |
global $lang;
|
|
28 |
|
|
29 |
preg_match_all('/<lang (?:code|id)="([a-z0-9_-]+)">([\w\W]+?)<\/lang>/', $text, $langmatch);
|
|
30 |
foreach ( $langmatch[0] as $i => $match )
|
|
31 |
{
|
|
32 |
if ( $langmatch[1][$i] == $lang->lang_code )
|
|
33 |
{
|
|
34 |
$text = str_replace_once($match, $langmatch[2][$i], $text);
|
|
35 |
}
|
|
36 |
else
|
|
37 |
{
|
|
38 |
$text = str_replace_once($match, '', $text);
|
|
39 |
}
|
|
40 |
}
|
|
41 |
|
|
42 |
return array();
|
|
43 |
}
|
|
44 |
|
|
45 |
public function templates(&$text)
|
|
46 |
{
|
|
47 |
$template_regex = "/\{\{(.+)((\n|\|[ ]*([A-z0-9]+)[ ]*=[ ]*(.+))*)\}\}/isU";
|
|
48 |
$i = 0;
|
|
49 |
while ( preg_match($template_regex, $text) )
|
|
50 |
{
|
|
51 |
$i++;
|
|
52 |
if ( $i == 5 )
|
|
53 |
break;
|
|
54 |
$text = RenderMan::include_templates($text);
|
|
55 |
}
|
|
56 |
|
|
57 |
return array();
|
|
58 |
}
|
|
59 |
|
|
60 |
public function heading(&$text)
|
|
61 |
{
|
1031
8a4b75e73137
Wiki formatting: Headings: tolerate spaces after line; added disable_rule method (required for rev. 1029)
Dan
diff
changeset
|
62 |
if ( !preg_match_all('/^(={1,6}) *(.+?) *\\1 *$/m', $text, $results) )
|
1027
|
63 |
return array();
|
|
64 |
|
|
65 |
$headings = array();
|
|
66 |
foreach ( $results[0] as $i => $match )
|
|
67 |
{
|
|
68 |
$headings[] = array(
|
|
69 |
'level' => strlen($results[1][$i]),
|
|
70 |
'text' => $results[2][$i]
|
|
71 |
);
|
|
72 |
}
|
|
73 |
|
|
74 |
$text = Carpenter::tokenize($text, $results[0]);
|
|
75 |
|
|
76 |
return $headings;
|
|
77 |
}
|
|
78 |
|
|
79 |
public function multilist(&$text)
|
|
80 |
{
|
|
81 |
// Match entire lists
|
|
82 |
$regex = '/^
|
|
83 |
([:#\*])+ # Initial list delimiter
|
|
84 |
[ ]*
|
|
85 |
.+?
|
|
86 |
(?:
|
|
87 |
\r?\n
|
|
88 |
(?:\\1|[ ]{2,})
|
|
89 |
[ ]*
|
|
90 |
.+?)*
|
|
91 |
$/mx';
|
|
92 |
|
|
93 |
if ( !preg_match_all($regex, $text, $lists) )
|
|
94 |
return array();
|
|
95 |
|
|
96 |
$types = array(
|
|
97 |
'*' => 'unordered',
|
|
98 |
'#' => 'ordered',
|
|
99 |
':' => 'indent'
|
|
100 |
);
|
|
101 |
|
|
102 |
$pieces = array();
|
|
103 |
foreach ( $lists[0] as $i => $list )
|
|
104 |
{
|
|
105 |
$token = $lists[1][$i];
|
|
106 |
$piece = array(
|
|
107 |
'type' => $types[$token],
|
|
108 |
'items' => array()
|
|
109 |
);
|
|
110 |
|
|
111 |
// convert windows newlines to unix
|
|
112 |
$list = str_replace("\r\n", "\n", $list);
|
|
113 |
$items_pre = explode("\n", $list);
|
|
114 |
$items = array();
|
|
115 |
// first pass, go through and combine items that are newlined
|
|
116 |
foreach ( $items_pre as $item )
|
|
117 |
{
|
|
118 |
if ( substr($item, 0, 1) == $token )
|
|
119 |
{
|
|
120 |
$items[] = $item;
|
|
121 |
}
|
|
122 |
else
|
|
123 |
{
|
|
124 |
// it's a continuation of the previous LI. Don't need to worry about
|
|
125 |
// undefined indices here since the regex should filter out all invalid
|
|
126 |
// markup. Just append this line to the previous.
|
|
127 |
$items[ count($items) - 1 ] .= "\n" . trim($item);
|
|
128 |
}
|
|
129 |
}
|
|
130 |
|
|
131 |
// second pass, separate items and tokens
|
|
132 |
unset($items_pre);
|
|
133 |
foreach ( $items as $item )
|
|
134 |
{
|
|
135 |
// get the depth
|
|
136 |
list($itemtoken) = explode(' ', $item);
|
|
137 |
// get the text
|
|
138 |
$itemtext = trim(substr($item, strlen($itemtoken)));
|
|
139 |
$piece['items'][] = array(
|
|
140 |
// depth starts at 1
|
|
141 |
'depth' => strlen($itemtoken),
|
|
142 |
'text' => $itemtext
|
|
143 |
);
|
|
144 |
}
|
|
145 |
|
|
146 |
$pieces[] = $piece;
|
|
147 |
}
|
|
148 |
|
|
149 |
$text = Carpenter::tokenize($text, $lists[0]);
|
|
150 |
|
|
151 |
return $pieces;
|
|
152 |
}
|
|
153 |
|
|
154 |
public function paragraph(&$text)
|
|
155 |
{
|
|
156 |
// This is potentially a hack. It allows the parser to stick in <_paragraph_bypass> tags
|
|
157 |
// to prevent the paragraph parser from interfering with pretty HTML generated elsewhere.
|
|
158 |
RenderMan::tag_strip('_paragraph_bypass', $text, $_nw);
|
|
159 |
|
|
160 |
// The trick with paragraphs is to not turn things into them when a block level element already wraps the block of text.
|
|
161 |
// First we need a list of block level elements (http://htmlhelp.com/reference/html40/block.html)
|
|
162 |
$blocklevel = 'address|blockquote|center|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|ol|p|pre|table|ul';
|
|
163 |
|
|
164 |
$regex = "/^(
|
|
165 |
(?:(?!(?:\\n|[ ]*<(?:{$blocklevel})))) # condition for starting paragraph: not a newline character or block level element
|
|
166 |
.+? # body text
|
|
167 |
(?:
|
|
168 |
\\n # additional lines in the para
|
|
169 |
(?:(?!(?:\\n|[ ]*<(?:{$blocklevel})))) # make sure of only one newline in a row, and no block level elements
|
|
170 |
.*?
|
|
171 |
)*
|
|
172 |
)$
|
|
173 |
/mx";
|
|
174 |
|
|
175 |
if ( !preg_match_all($regex, $text, $matches) )
|
|
176 |
return array();
|
|
177 |
|
|
178 |
// Debugging :)
|
|
179 |
// die('<pre>' . htmlspecialchars(print_r($matches, true)) . '</pre>');
|
|
180 |
|
|
181 |
// restore stripped
|
|
182 |
RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw);
|
|
183 |
|
|
184 |
// tokenize
|
|
185 |
$text = Carpenter::tokenize($text, $matches[0]);
|
|
186 |
|
|
187 |
return $matches[0];
|
|
188 |
}
|
|
189 |
}
|
|
190 |
|
|
191 |
function parser_mediawiki_xhtml_image($text)
|
|
192 |
{
|
|
193 |
$text = RenderMan::process_image_tags($text, $taglist);
|
|
194 |
$text = RenderMan::process_imgtags_stage2($text, $taglist);
|
|
195 |
return $text;
|
|
196 |
}
|
|
197 |
|
|
198 |
function parser_mediawiki_xhtml_tables($text)
|
|
199 |
{
|
|
200 |
return process_tables($text);
|
|
201 |
}
|
|
202 |
|