1
|
1 |
<?php
|
|
2 |
|
|
3 |
/*
|
|
4 |
* Enano - an open-source CMS capable of wiki functions, Drupal-like sidebar blocks, and everything in between
|
|
5 |
* Version 1.0 (Banshee)
|
|
6 |
* Copyright (C) 2006-2007 Dan Fuhry
|
|
7 |
* search.php - algorithm used to search pages
|
|
8 |
*
|
|
9 |
* This program is Free Software; you can redistribute and/or modify it under the terms of the GNU General Public License
|
|
10 |
* as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.
|
|
11 |
*
|
|
12 |
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
|
|
13 |
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for details.
|
|
14 |
*/
|
|
15 |
|
|
16 |
/**
|
|
17 |
* Implementation of array_merge() that preserves key names. $arr2 takes precedence over $arr1.
|
|
18 |
* @param array $arr1
|
|
19 |
* @param array $arr2
|
|
20 |
* @return array
|
|
21 |
*/
|
|
22 |
|
|
23 |
function enano_safe_array_merge($arr1, $arr2)
|
|
24 |
{
|
|
25 |
$arr3 = $arr1;
|
|
26 |
foreach($arr2 as $k => $v)
|
|
27 |
{
|
|
28 |
$arr3[$k] = $v;
|
|
29 |
}
|
|
30 |
return $arr3;
|
|
31 |
}
|
|
32 |
|
|
33 |
/**
|
|
34 |
* Algorithm to actually do the searching. This system usually works pretty fast (tested and developed on a site with 22 pages) but one
|
|
35 |
* caveat of this algorithm is that it has to load the entire index into memory. It also requires manual parsing of the search query
|
|
36 |
* which can be quite CPU-intensive. On the flip side this algorithm is extremely flexible and can be adapted for other uses very easily.
|
|
37 |
*
|
|
38 |
* Most of the time, this system is disabled. It is only used when MySQL can't or won't allow FULLTEXT indices.
|
|
39 |
*
|
|
40 |
* @package Enano
|
|
41 |
* @subpackage Page management frontend
|
|
42 |
* @license GNU General Public License http://www.enanocms.org/Special:GNU_General_Public_License
|
|
43 |
*/
|
|
44 |
|
|
45 |
class Searcher
|
|
46 |
{
|
|
47 |
|
|
48 |
var $results;
|
|
49 |
var $index;
|
|
50 |
var $warnings;
|
|
51 |
var $match_case = false;
|
|
52 |
|
|
53 |
function __construct()
|
|
54 |
{
|
|
55 |
$this->warnings = Array();
|
|
56 |
}
|
|
57 |
|
|
58 |
function Searcher()
|
|
59 |
{
|
|
60 |
$this->__construct();
|
|
61 |
}
|
|
62 |
|
|
63 |
function warn($t)
|
|
64 |
{
|
|
65 |
if(!in_array($t, $this->warnings)) $this->warnings[] = $t;
|
|
66 |
}
|
|
67 |
|
|
68 |
function convertCase($text)
|
|
69 |
{
|
|
70 |
return ( $this->match_case ) ? $text : strtolower($text);
|
|
71 |
}
|
|
72 |
|
|
73 |
function buildIndex($texts)
|
|
74 |
{
|
|
75 |
$this->index = Array();
|
|
76 |
|
|
77 |
foreach($texts as $i => $l)
|
|
78 |
{
|
|
79 |
$seed = md5(microtime(true) . mt_rand());
|
|
80 |
$texts[$i] = str_replace("'", 'xxxApoS'.$seed.'xxx', $texts[$i]);
|
|
81 |
$texts[$i] = preg_replace('#([\W_]+)#i', ' ', $texts[$i]);
|
|
82 |
$texts[$i] = preg_replace('#([ ]+?)#', ' ', $texts[$i]);
|
|
83 |
$texts[$i] = preg_replace('#([\']*){2,}#s', '', $texts[$i]);
|
|
84 |
$texts[$i] = str_replace('xxxApoS'.$seed.'xxx', "'", $texts[$i]);
|
|
85 |
$l = $texts[$i];
|
|
86 |
$words = Array();
|
|
87 |
$good_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\' ';
|
|
88 |
$good_chars = enano_str_split($good_chars, 1);
|
|
89 |
$letters = enano_str_split($l, 1);
|
|
90 |
foreach($letters as $x => $t)
|
|
91 |
{
|
|
92 |
if(!in_array($t, $good_chars))
|
|
93 |
unset($letters[$x]);
|
|
94 |
}
|
|
95 |
$letters = implode('', $letters);
|
|
96 |
$words = explode(' ', $letters);
|
|
97 |
foreach($words as $c => $w)
|
|
98 |
{
|
|
99 |
if(strlen($w) < 4)
|
|
100 |
unset($words[$c]);
|
|
101 |
else
|
|
102 |
$words[$c] = $w;
|
|
103 |
}
|
|
104 |
$words = array_values($words);
|
|
105 |
foreach($words as $c => $w)
|
|
106 |
{
|
|
107 |
if(isset($this->index[$w]))
|
|
108 |
{
|
|
109 |
if(!in_array($i, $this->index[$w]))
|
|
110 |
$this->index[$w][] = $i;
|
|
111 |
}
|
|
112 |
else
|
|
113 |
{
|
|
114 |
$this->index[$w] = Array();
|
|
115 |
$this->index[$w][] = $i;
|
|
116 |
}
|
|
117 |
}
|
|
118 |
}
|
|
119 |
foreach($this->index as $k => $v)
|
|
120 |
{
|
|
121 |
$this->index[$k] = implode(',', $this->index[$k]);
|
|
122 |
}
|
|
123 |
}
|
|
124 |
|
|
125 |
function search($query, $texts)
|
|
126 |
{
|
|
127 |
|
|
128 |
// OK, let's establish some basics here. Here is the procedure for performing the search:
|
|
129 |
// * search for items that matches all the terms in the correct order.
|
|
130 |
// * search for items that match in any order
|
|
131 |
// * eliminate one term and do the loop all over
|
|
132 |
|
|
133 |
$this->results = Array();
|
|
134 |
$query = $this->parseQuery($query);
|
|
135 |
$querybak = $query;
|
|
136 |
for($i = sizeof($query['any'])-1; $i >= 0; $i--)
|
|
137 |
{
|
|
138 |
$res = $this->performCoreSearch($query, $texts, true);
|
|
139 |
$this->results = enano_safe_array_merge($this->results, $res);
|
|
140 |
$res = $this->performCoreSearch($query, $texts, false);
|
|
141 |
$this->results = enano_safe_array_merge($this->results, $res);
|
|
142 |
unset($query['any'][$i]);
|
|
143 |
}
|
|
144 |
|
|
145 |
// Last resort - search for any of the terms instead of all of 'em
|
|
146 |
$res = $this->performCoreSearch($querybak, $texts, false, true);
|
|
147 |
$this->results = enano_safe_array_merge($this->results, $res);
|
|
148 |
|
|
149 |
$this->highlightResults($querybak);
|
|
150 |
}
|
|
151 |
|
|
152 |
// $texts should be a textual MySQL query!
|
|
153 |
// @todo document
|
|
154 |
function searchMySQL($query, $texts)
|
|
155 |
{
|
|
156 |
global $db;
|
|
157 |
// OK, let's establish some basics here. Here is the procedure for performing the search:
|
|
158 |
// * search for items that matches all the terms in the correct order.
|
|
159 |
// * search for items that match in any order
|
|
160 |
// * eliminate one term and do the loop all over
|
|
161 |
|
|
162 |
$this->results = Array();
|
|
163 |
$query = $this->parseQuery($query);
|
|
164 |
$querytmp = $query;
|
|
165 |
$querybak = $query;
|
|
166 |
for($i = sizeof($querytmp['any'])-1; $i >= 0; $i--)
|
|
167 |
{
|
|
168 |
$res = $this->performCoreSearchMySQL($querytmp, $texts, true);
|
|
169 |
$this->results = enano_safe_array_merge($this->results, $res);
|
|
170 |
$res = $this->performCoreSearchMySQL($querytmp, $texts, false);
|
|
171 |
$this->results = enano_safe_array_merge($this->results, $res);
|
|
172 |
unset($querytmp['any'][$i]);
|
|
173 |
}
|
|
174 |
|
|
175 |
// Last resort - search for any of the terms instead of all of 'em
|
|
176 |
$res = $this->performCoreSearchMySQL($querybak, $texts, false, true);
|
|
177 |
$this->results = enano_safe_array_merge($this->results, $res);
|
|
178 |
|
|
179 |
$this->highlightResults($querybak);
|
|
180 |
}
|
|
181 |
|
|
182 |
/**
|
|
183 |
* This method assumes that $query is already parsed and $texts is an (associative) array of possible results
|
|
184 |
* @param array $query A search query parsed with Searcher::parseQuery()
|
|
185 |
* @param array $texts The list of possible results
|
|
186 |
* @param bool $exact_order If true, only matches results with the terms in the same order as the terms in the query
|
|
187 |
* @return array An associative array of results
|
|
188 |
* @access private
|
|
189 |
*/
|
|
190 |
function performCoreSearch($query, $texts, $exact_order = false, $any = false)
|
|
191 |
{
|
|
192 |
$textkeys = array_keys($texts);
|
|
193 |
$results = Array();
|
|
194 |
if($exact_order)
|
|
195 |
{
|
|
196 |
$query = $this->concatQueryTerms($query);
|
|
197 |
}
|
|
198 |
$query['trm'] = array_merge($query['any'], $query['req']);
|
|
199 |
# Find all remotely possible results first
|
|
200 |
// Single-word terms
|
|
201 |
foreach($this->index as $term => $keys)
|
|
202 |
{
|
|
203 |
foreach($query['trm'] as $userterm)
|
|
204 |
{
|
|
205 |
if($this->convertCase($userterm) == $this->convertCase($term))
|
|
206 |
{
|
|
207 |
$k = explode(',', $keys);
|
|
208 |
foreach($k as $idxkey)
|
|
209 |
{
|
|
210 |
if(isset($texts[$idxkey]))
|
|
211 |
{
|
|
212 |
$results[$idxkey] = $texts[$idxkey];
|
|
213 |
}
|
|
214 |
else
|
|
215 |
{
|
|
216 |
if(preg_match('#^([0-9]+)$#', $idxkey))
|
|
217 |
{
|
|
218 |
$idxkey = intval($idxkey);
|
|
219 |
if(isset($texts[$idxkey])) $results[$idxkey] = $texts[$idxkey];
|
|
220 |
}
|
|
221 |
}
|
|
222 |
}
|
|
223 |
}
|
|
224 |
}
|
|
225 |
}
|
|
226 |
// Quoted terms
|
|
227 |
foreach($query['trm'] as $userterm)
|
|
228 |
{
|
|
229 |
if(!preg_match('/[\s"\'~`!@#\$%\^&\*\(\)\{\}:;<>,.\/\?_-]/', $userterm)) continue;
|
|
230 |
foreach($texts as $k => $t)
|
|
231 |
{
|
|
232 |
if(strstr($this->convertCase($t), $this->convertCase($userterm)))
|
|
233 |
{
|
|
234 |
// We have a match!
|
|
235 |
if(!isset($results[$k])) $results[$k] = $t;
|
|
236 |
}
|
|
237 |
}
|
|
238 |
}
|
|
239 |
// Remove excluded terms
|
|
240 |
foreach($results as $k => $r)
|
|
241 |
{
|
|
242 |
foreach($query['not'] as $not)
|
|
243 |
{
|
|
244 |
if(strstr($this->convertCase($r), $this->convertCase($not))) unset($results[$k]);
|
|
245 |
}
|
|
246 |
}
|
|
247 |
if(!$any)
|
|
248 |
{
|
|
249 |
// Remove results not containing all terms
|
|
250 |
foreach($results as $k => $r)
|
|
251 |
{
|
|
252 |
foreach($query['any'] as $term)
|
|
253 |
{
|
|
254 |
if(!strstr($this->convertCase($r), $this->convertCase($term))) unset($results[$k]);
|
|
255 |
}
|
|
256 |
}
|
|
257 |
}
|
|
258 |
// Remove results not containing all required terms
|
|
259 |
foreach($results as $k => $r)
|
|
260 |
{
|
|
261 |
foreach($query['req'] as $term)
|
|
262 |
{
|
|
263 |
if(!strstr($this->convertCase($r), $this->convertCase($term))) unset($results[$k]);
|
|
264 |
}
|
|
265 |
}
|
|
266 |
return $results;
|
|
267 |
}
|
|
268 |
|
|
269 |
/**
|
|
270 |
* This is the same as performCoreSearch, but $texts should be a MySQL result resource. This can save tremendous amounts of memory on large sites.
|
|
271 |
* @param array $query A search query parsed with Searcher::parseQuery()
|
|
272 |
* @param string $texts A text MySQL query that selects the text as the first column and the index key as the second column
|
|
273 |
* @param bool $exact_order If true, only matches results with the terms in the same order as the terms in the query
|
|
274 |
* @return array An associative array of results
|
|
275 |
* @access private
|
|
276 |
*/
|
|
277 |
function performCoreSearchMySQL($query, $texts, $exact_order = false, $any = false)
|
|
278 |
{
|
|
279 |
global $db;
|
|
280 |
$results = Array();
|
|
281 |
if($exact_order)
|
|
282 |
{
|
|
283 |
$query = $this->concatQueryTerms($query);
|
|
284 |
}
|
|
285 |
$query['trm'] = array_merge($query['any'], $query['req']);
|
|
286 |
# Find all remotely possible results first
|
|
287 |
$texts = $db->sql_query($texts);
|
|
288 |
if ( !$texts )
|
|
289 |
$db->_die('The error is in the search engine.');
|
|
290 |
if ( $r = $db->fetchrow_num($texts) )
|
|
291 |
{
|
|
292 |
do
|
|
293 |
{
|
|
294 |
foreach($this->index as $term => $keys)
|
|
295 |
{
|
|
296 |
foreach($query['trm'] as $userterm)
|
|
297 |
{
|
|
298 |
if($this->convertCase($userterm) == $this->convertCase($term))
|
|
299 |
{
|
|
300 |
$k = explode(',', $keys);
|
|
301 |
foreach($k as $idxkey)
|
|
302 |
{
|
|
303 |
$row[0] = $r[0];
|
|
304 |
$row[1] = $r[1];
|
|
305 |
if(!isset($row[1]))
|
|
306 |
{
|
|
307 |
echo('PHP PARSER BUG: $row[1] is set but not set... includes/search.php:'.__LINE__);
|
|
308 |
$GLOBALS['template']->footer();
|
|
309 |
exit;
|
|
310 |
}
|
|
311 |
if($row[1] == $idxkey)
|
|
312 |
$results[$idxkey] = $row[0];
|
|
313 |
else
|
|
314 |
{
|
|
315 |
if(preg_match('#^([0-9]+)$#', $idxkey))
|
|
316 |
{
|
|
317 |
$idxkey = intval($idxkey);
|
|
318 |
if($row[1] == $idxkey) $results[$idxkey] = $row[0];
|
|
319 |
}
|
|
320 |
}
|
|
321 |
}
|
|
322 |
}
|
|
323 |
}
|
|
324 |
}
|
|
325 |
// Quoted terms
|
|
326 |
foreach($query['trm'] as $userterm)
|
|
327 |
{
|
|
328 |
if(!preg_match('/[\s"\'~`!@#\$%\^&\*\(\)\{\}:;<>,.\/\?_-]/', $userterm)) continue;
|
|
329 |
if(strstr($this->convertCase($r[0]), $this->convertCase($userterm)))
|
|
330 |
{
|
|
331 |
// We have a match!
|
|
332 |
if(!isset($results[$r[1]])) $results[$r[1]] = $r[0];
|
|
333 |
}
|
|
334 |
}
|
|
335 |
} while( $r = $db->fetchrow_num($texts) );
|
|
336 |
}
|
|
337 |
// Remove excluded terms
|
|
338 |
foreach($results as $k => $r)
|
|
339 |
{
|
|
340 |
foreach($query['not'] as $not)
|
|
341 |
{
|
|
342 |
if(strstr($this->convertCase($r), $this->convertCase($not))) unset($results[$k]);
|
|
343 |
}
|
|
344 |
}
|
|
345 |
if(!$any)
|
|
346 |
{
|
|
347 |
// Remove results not containing all terms
|
|
348 |
foreach($results as $k => $r)
|
|
349 |
{
|
|
350 |
foreach($query['any'] as $term)
|
|
351 |
{
|
|
352 |
if(!strstr($this->convertCase($r), $this->convertCase($term))) unset($results[$k]);
|
|
353 |
}
|
|
354 |
}
|
|
355 |
}
|
|
356 |
// Remove results not containing all terms
|
|
357 |
foreach($results as $k => $r)
|
|
358 |
{
|
|
359 |
foreach($query['req'] as $term)
|
|
360 |
{
|
|
361 |
if(!strstr($this->convertCase($r), $this->convertCase($term))) unset($results[$k]);
|
|
362 |
}
|
|
363 |
}
|
|
364 |
return $results;
|
|
365 |
}
|
|
366 |
|
|
367 |
function concatQueryTerms($query)
|
|
368 |
{
|
|
369 |
$tmp = implode(' ', $query['any']);
|
|
370 |
unset($query['any']);
|
|
371 |
$query['any'] = Array(0 => $tmp);
|
|
372 |
return $query;
|
|
373 |
}
|
|
374 |
|
|
375 |
/**
|
|
376 |
* Builds a basic assoc array with a more organized version of the query
|
|
377 |
*/
|
|
378 |
|
|
379 |
function parseQuery($query)
|
|
380 |
{
|
|
381 |
$ret = array(
|
|
382 |
'any' => array(),
|
|
383 |
'req' => array(),
|
|
384 |
'not' => array()
|
|
385 |
);
|
|
386 |
$terms = array();
|
|
387 |
$in_quote = false;
|
|
388 |
$start_term = 0;
|
|
389 |
$just_finished = false;
|
|
390 |
for ( $i = 0; $i < strlen($query); $i++ )
|
|
391 |
{
|
|
392 |
$chr = $query{$i};
|
|
393 |
$prev = ( $i > 0 ) ? $query{ $i - 1 } : '';
|
|
394 |
$next = ( ( $i + 1 ) < strlen($query) ) ? $query{ $i + 1 } : '';
|
|
395 |
|
|
396 |
if ( ( $chr == ' ' && !$in_quote ) || ( $i + 1 == strlen ( $query ) ) )
|
|
397 |
{
|
|
398 |
$len = ( $next == '' ) ? $i + 1 : $i - $start_term;
|
|
399 |
$word = substr ( $query, $start_term, $len );
|
|
400 |
$terms[] = $word;
|
|
401 |
$start_term = $i + 1;
|
|
402 |
}
|
|
403 |
|
|
404 |
elseif ( $chr == '"' && $in_quote && $prev != '\\' )
|
|
405 |
{
|
|
406 |
$word = substr ( $query, $start_term, $i - $start_term + 1 );
|
|
407 |
$start_pos = ( $next == ' ' ) ? $i + 2 : $i + 1;
|
|
408 |
$in_quote = false;
|
|
409 |
}
|
|
410 |
|
|
411 |
elseif ( $chr == '"' && !$in_quote )
|
|
412 |
{
|
|
413 |
$in_quote = true;
|
|
414 |
$start_pos = $i;
|
|
415 |
}
|
|
416 |
|
|
417 |
}
|
|
418 |
|
|
419 |
$ticker = 0;
|
|
420 |
|
|
421 |
foreach ( $terms as $element => $__unused )
|
|
422 |
{
|
|
423 |
$atom =& $terms[$element];
|
|
424 |
|
|
425 |
$ticker++;
|
|
426 |
|
|
427 |
if ( $ticker == 20 )
|
|
428 |
{
|
|
429 |
$this->warn('Some of your search terms were excluded because searches are limited to 20 terms to prevent excessive server load.');
|
|
430 |
break;
|
|
431 |
}
|
|
432 |
|
|
433 |
if ( substr ( $atom, 0, 2 ) == '+"' && substr ( $atom, ( strlen ( $atom ) - 1 ), 1 ) == '"' )
|
|
434 |
{
|
|
435 |
$word = substr ( $atom, 2, ( strlen( $atom ) - 3 ) );
|
|
436 |
if ( strlen ( $word ) < 4 )
|
|
437 |
{
|
|
438 |
$this->warn('One or more of your search terms was excluded because terms must be at least 4 characters in length.');
|
|
439 |
$ticker--;
|
|
440 |
continue;
|
|
441 |
}
|
|
442 |
if(in_array($word, $ret['req']))
|
|
443 |
{
|
|
444 |
$this->warn('One or more of your search terms was excluded because duplicate terms were encountered.');
|
|
445 |
$ticker--;
|
|
446 |
continue;
|
|
447 |
}
|
|
448 |
$ret['req'][] = $word;
|
|
449 |
}
|
|
450 |
elseif ( substr ( $atom, 0, 2 ) == '-"' && substr ( $atom, ( strlen ( $atom ) - 1 ), 1 ) == '"' )
|
|
451 |
{
|
|
452 |
$word = substr ( $atom, 2, ( strlen( $atom ) - 3 ) );
|
|
453 |
if ( strlen ( $word ) < 4 )
|
|
454 |
{
|
|
455 |
$this->warn('One or more of your search terms was excluded because terms must be at least 4 characters in length.');
|
|
456 |
$ticker--;
|
|
457 |
continue;
|
|
458 |
}
|
|
459 |
if(in_array($word, $ret['not']))
|
|
460 |
{
|
|
461 |
$this->warn('One or more of your search terms was excluded because duplicate terms were encountered.');
|
|
462 |
$ticker--;
|
|
463 |
continue;
|
|
464 |
}
|
|
465 |
$ret['not'][] = $word;
|
|
466 |
}
|
|
467 |
elseif ( substr ( $atom, 0, 1 ) == '+' )
|
|
468 |
{
|
|
469 |
$word = substr ( $atom, 1 );
|
|
470 |
if ( strlen ( $word ) < 4 )
|
|
471 |
{
|
|
472 |
$this->warn('One or more of your search terms was excluded because terms must be at least 4 characters in length.');
|
|
473 |
$ticker--;
|
|
474 |
continue;
|
|
475 |
}
|
|
476 |
if(in_array($word, $ret['req']))
|
|
477 |
{
|
|
478 |
$this->warn('One or more of your search terms was excluded because duplicate terms were encountered.');
|
|
479 |
$ticker--;
|
|
480 |
continue;
|
|
481 |
}
|
|
482 |
$ret['req'][] = $word;
|
|
483 |
}
|
|
484 |
elseif ( substr ( $atom, 0, 1 ) == '-' )
|
|
485 |
{
|
|
486 |
$word = substr ( $atom, 1 );
|
|
487 |
if ( strlen ( $word ) < 4 )
|
|
488 |
{
|
|
489 |
$this->warn('One or more of your search terms was excluded because terms must be at least 4 characters in length.');
|
|
490 |
$ticker--;
|
|
491 |
continue;
|
|
492 |
}
|
|
493 |
if(in_array($word, $ret['not']))
|
|
494 |
{
|
|
495 |
$this->warn('One or more of your search terms was excluded because duplicate terms were encountered.');
|
|
496 |
$ticker--;
|
|
497 |
continue;
|
|
498 |
}
|
|
499 |
$ret['not'][] = $word;
|
|
500 |
}
|
|
501 |
elseif ( substr ( $atom, 0, 1 ) == '"' && substr ( $atom, ( strlen($atom) - 1 ), 1 ) == '"' )
|
|
502 |
{
|
|
503 |
$word = substr ( $atom, 1, ( strlen ( $atom ) - 2 ) );
|
|
504 |
if ( strlen ( $word ) < 4 )
|
|
505 |
{
|
|
506 |
$this->warn('One or more of your search terms was excluded because terms must be at least 4 characters in length.');
|
|
507 |
$ticker--;
|
|
508 |
continue;
|
|
509 |
}
|
|
510 |
if(in_array($word, $ret['any']))
|
|
511 |
{
|
|
512 |
$this->warn('One or more of your search terms was excluded because duplicate terms were encountered.');
|
|
513 |
$ticker--;
|
|
514 |
continue;
|
|
515 |
}
|
|
516 |
$ret['any'][] = $word;
|
|
517 |
}
|
|
518 |
else
|
|
519 |
{
|
|
520 |
$word = $atom;
|
|
521 |
if ( strlen ( $word ) < 4 )
|
|
522 |
{
|
|
523 |
$this->warn('One or more of your search terms was excluded because terms must be at least 4 characters in length.');
|
|
524 |
$ticker--;
|
|
525 |
continue;
|
|
526 |
}
|
|
527 |
if(in_array($word, $ret['any']))
|
|
528 |
{
|
|
529 |
$this->warn('One or more of your search terms was excluded because duplicate terms were encountered.');
|
|
530 |
$ticker--;
|
|
531 |
continue;
|
|
532 |
}
|
|
533 |
$ret['any'][] = $word;
|
|
534 |
}
|
|
535 |
}
|
|
536 |
return $ret;
|
|
537 |
}
|
|
538 |
|
|
539 |
function highlightResults($query, $starttag = '<b>', $endtag = '</b>')
|
|
540 |
{
|
|
541 |
$query['trm'] = array_merge($query['any'], $query['req']);
|
|
542 |
//die('<pre>'.print_r($query, true).'</pre>');
|
|
543 |
foreach($query['trm'] as $q)
|
|
544 |
{
|
|
545 |
foreach($this->results as $k => $r)
|
|
546 |
{
|
|
547 |
$startplace = 0;
|
|
548 |
//$this->results[$k] = htmlspecialchars($this->results[$k]);
|
|
549 |
for($i = 0; $i < strlen($r); $i++)
|
|
550 |
{
|
|
551 |
$word = substr($r, $i, strlen($q));
|
|
552 |
if($this->convertCase($word) == $this->convertCase($q))
|
|
553 |
{
|
|
554 |
$word = $starttag . $word . $endtag;
|
|
555 |
$this->results[$k] = substr($r, 0, $i) . $word . substr($r, $i + strlen($q), strlen($r)+999999);
|
|
556 |
$startplace = $i - 75;
|
|
557 |
if($startplace < 0) $startplace = 0;
|
|
558 |
$this->results[$k] = '...'.trim(substr($this->results[$k], $startplace, strlen($word) + 150)).'...';
|
|
559 |
continue 2;
|
|
560 |
}
|
|
561 |
}
|
|
562 |
}
|
|
563 |
}
|
|
564 |
}
|
|
565 |
|
|
566 |
}
|
|
567 |
|
|
568 |
/**
|
|
569 |
* Developer-friendly way to do searches. :-) Uses the MySQL FULLTEXT index type.
|
|
570 |
* @package Enano
|
|
571 |
* @subpackage Search
|
|
572 |
*/
|
|
573 |
|
|
574 |
class MySQL_Fulltext_Search {
|
|
575 |
|
|
576 |
/**
|
|
577 |
* Performs a search.
|
|
578 |
* @param string The search query
|
|
579 |
* @return resource MySQL result resource - this is an UNBUFFERED query.
|
|
580 |
*/
|
|
581 |
|
|
582 |
function search($query)
|
|
583 |
{
|
|
584 |
global $db, $session, $paths, $template, $plugins; // Common objects
|
|
585 |
|
|
586 |
$fulltext_col = 'MATCH(t.page_id,t.namespace,p.name,t.page_text) AGAINST (\'' . $db->escape($query) . '\' IN BOOLEAN MODE)';
|
|
587 |
$sql = "SELECT t.page_text,CONCAT('ns=',t.namespace,';pid=',t.page_id) AS page_identifier, $fulltext_col AS score, CHAR_LENGTH(t.page_text) AS length FROM ".table_prefix."page_text AS t
|
|
588 |
LEFT JOIN ".table_prefix."pages AS p
|
|
589 |
ON ( p.urlname=t.page_id AND p.namespace=t.namespace)
|
|
590 |
WHERE $fulltext_col > 0
|
|
591 |
AND p.visible=1
|
|
592 |
ORDER BY score DESC;";
|
|
593 |
$q = $db->sql_unbuffered_query($sql);
|
|
594 |
if ( !$q )
|
|
595 |
$db->_die();
|
|
596 |
|
|
597 |
return $q;
|
|
598 |
}
|
|
599 |
|
|
600 |
function highlight_result($query, $result)
|
|
601 |
{
|
|
602 |
global $db, $session, $paths, $template, $plugins; // Common objects
|
|
603 |
$search = new Searcher();
|
|
604 |
$parsed_query = $search->parseQuery($query);
|
|
605 |
return $this->highlight_result_inner($query, $result);
|
|
606 |
}
|
|
607 |
|
|
608 |
function highlight_result_inner($query, $fulltext, $starttag = '<b>', $endtag = '</b>')
|
|
609 |
{
|
|
610 |
$result = false;
|
|
611 |
$query['trm'] = array_merge($query['any'], $query['req']);
|
|
612 |
//die('<pre>'.print_r($query, true).'</pre>');
|
|
613 |
foreach($query['trm'] as $q)
|
|
614 |
{
|
|
615 |
$startplace = 0;
|
|
616 |
//$this->results[$k] = htmlspecialchars($this->results[$k]);
|
|
617 |
for($i = 0; $i < strlen($r); $i++)
|
|
618 |
{
|
|
619 |
$word = substr($r, $i, strlen($q));
|
|
620 |
if($this->convertCase($word) == $this->convertCase($q))
|
|
621 |
{
|
|
622 |
$word = $starttag . $word . $endtag;
|
|
623 |
$result = substr($fulltext, 0, $i) . $word . substr($r, $i + strlen($q), strlen($r)+99999999);
|
|
624 |
$startplace = $i - 75;
|
|
625 |
if($startplace < 0) $startplace = 0;
|
|
626 |
$result = '...'.trim(substr($result, $startplace, strlen($word) + 150)).'...';
|
|
627 |
continue 2;
|
|
628 |
}
|
|
629 |
}
|
|
630 |
}
|
|
631 |
return $result;
|
|
632 |
}
|
|
633 |
|
|
634 |
}
|
|
635 |
|
|
636 |
?>
|