119 foreach($this->index as $k => $v) |
98 foreach($this->index as $k => $v) |
120 { |
99 { |
121 $this->index[$k] = implode(',', $this->index[$k]); |
100 $this->index[$k] = implode(',', $this->index[$k]); |
122 } |
101 } |
123 } |
102 } |
124 |
103 } |
125 function search($query, $texts) |
104 |
126 { |
105 /** |
127 |
106 * Searches the site for the specified string and returns an array with each value being an array filled with the following: |
128 // OK, let's establish some basics here. Here is the procedure for performing the search: |
107 * page_id: string, self-explanatory |
129 // * search for items that matches all the terms in the correct order. |
108 * namespace: string, self-explanatory |
130 // * search for items that match in any order |
109 * page_length: integer, the length of the full page in bytes |
131 // * eliminate one term and do the loop all over |
110 * page_text: string, the contents of the page (trimmed to ~150 bytes if necessary) |
132 |
111 * score: numerical relevance score, 1-100, rounded to 2 digits and calculated based on which terms were present and which were not |
133 $this->results = Array(); |
112 * @param string Search query |
134 $query = $this->parseQuery($query); |
113 * @param string Will be filled with any warnings encountered whilst parsing the query |
135 $querybak = $query; |
114 * @param bool Case sensitivity - defaults to false |
136 for($i = sizeof($query['any'])-1; $i >= 0; $i--) |
115 * @return array |
137 { |
116 */ |
138 $res = $this->performCoreSearch($query, $texts, true); |
117 |
139 $this->results = enano_safe_array_merge($this->results, $res); |
118 function perform_search($query, &$warnings, $case_sensitive = false) |
140 $res = $this->performCoreSearch($query, $texts, false); |
119 { |
141 $this->results = enano_safe_array_merge($this->results, $res); |
120 global $db, $session, $paths, $template, $plugins; // Common objects |
142 unset($query['any'][$i]); |
121 $warnings = array(); |
143 } |
122 |
144 |
123 $query = parse_search_query($query, $warnings); |
145 // Last resort - search for any of the terms instead of all of 'em |
124 |
146 $res = $this->performCoreSearch($querybak, $texts, false, true); |
125 // Segregate search terms containing spaces |
147 $this->results = enano_safe_array_merge($this->results, $res); |
126 $query_phrase = array( |
148 |
127 'any' => array(), |
149 $this->highlightResults($querybak); |
128 'req' => array() |
150 } |
129 ); |
151 |
130 |
152 // $texts should be a textual MySQL query! |
131 foreach ( $query['any'] as $i => $_ ) |
153 // @todo document |
132 { |
154 function searchMySQL($query, $texts) |
133 $term =& $query['any'][$i]; |
155 { |
134 $term = trim($term); |
156 global $db; |
135 // the indexer only indexes words a-z with apostrophes |
157 // OK, let's establish some basics here. Here is the procedure for performing the search: |
136 if ( preg_match('/[^A-Za-z\']/', $term) ) |
158 // * search for items that matches all the terms in the correct order. |
137 { |
159 // * search for items that match in any order |
138 $query_phrase['any'][] = $term; |
160 // * eliminate one term and do the loop all over |
139 unset($term, $query['any'][$i]); |
161 |
140 } |
162 $this->results = Array(); |
141 } |
163 $query = $this->parseQuery($query); |
142 unset($term); |
164 $querytmp = $query; |
143 $query['any'] = array_values($query['any']); |
165 $querybak = $query; |
144 |
166 for($i = sizeof($querytmp['any'])-1; $i >= 0; $i--) |
145 foreach ( $query['req'] as $i => $_ ) |
167 { |
146 { |
168 $res = $this->performCoreSearchMySQL($querytmp, $texts, true); |
147 $term =& $query['req'][$i]; |
169 $this->results = enano_safe_array_merge($this->results, $res); |
148 $term = trim($term); |
170 $res = $this->performCoreSearchMySQL($querytmp, $texts, false); |
149 if ( preg_match('/[^A-Za-z\']/', $term) ) |
171 $this->results = enano_safe_array_merge($this->results, $res); |
150 { |
172 unset($querytmp['any'][$i]); |
151 $query_phrase['req'][] = $term; |
173 } |
152 unset($term, $query['req'][$i]); |
174 |
153 } |
175 // Last resort - search for any of the terms instead of all of 'em |
154 } |
176 $res = $this->performCoreSearchMySQL($querybak, $texts, false, true); |
155 unset($term); |
177 $this->results = enano_safe_array_merge($this->results, $res); |
156 $query['req'] = array_values($query['req']); |
178 |
157 |
179 $this->highlightResults($querybak); |
158 $results = array(); |
180 } |
159 $scores = array(); |
181 |
160 |
182 /** |
161 // FIXME: Update to use FULLTEXT algo when available. |
183 * This method assumes that $query is already parsed and $texts is an (associative) array of possible results |
162 |
184 * @param array $query A search query parsed with Searcher::parseQuery() |
163 // Build an SQL query to load from the index table |
185 * @param array $texts The list of possible results |
164 if ( count($query['any']) < 1 && count($query['req']) < 1 && count($query_phrase['any']) < 1 && count($query_phrase['req']) < 1 ) |
186 * @param bool $exact_order If true, only matches results with the terms in the same order as the terms in the query |
165 { |
187 * @return array An associative array of results |
166 // This is both because of technical restrictions and devastation that would occur on shared servers/large sites. |
188 * @access private |
167 $warnings[] = 'You need to have at least one keyword in your search query. Searching only for pages not containing a term is not allowed.'; |
189 */ |
168 return array(); |
190 function performCoreSearch($query, $texts, $exact_order = false, $any = false) |
169 } |
191 { |
170 |
192 $textkeys = array_keys($texts); |
171 // |
193 $results = Array(); |
172 // STAGE 1 |
194 if($exact_order) |
173 // Get all possible result pages from the search index. Tally which pages have the most words, and later sort them by boolean relevance |
195 { |
174 // |
196 $query = $this->concatQueryTerms($query); |
175 |
197 } |
176 // Skip this if no indexable words are included |
198 $query['trm'] = array_merge($query['any'], $query['req']); |
177 |
199 # Find all remotely possible results first |
178 if ( count($query['any']) > 0 || count($query['req']) > 0 ) |
200 // Single-word terms |
179 { |
201 foreach($this->index as $term => $keys) |
180 $where_any = array(); |
202 { |
181 foreach ( $query['any'] as $term ) |
203 foreach($query['trm'] as $userterm) |
182 { |
204 { |
183 $term = escape_string_like($term); |
205 if($this->convertCase($userterm) == $this->convertCase($term)) |
184 if ( !$case_sensitive ) |
|
185 $term = strtolower($term); |
|
186 $where_any[] = $term; |
|
187 } |
|
188 foreach ( $query['req'] as $term ) |
|
189 { |
|
190 $term = escape_string_like($term); |
|
191 if ( !$case_sensitive ) |
|
192 $term = strtolower($term); |
|
193 $where_any[] = $term; |
|
194 } |
|
195 |
|
196 $col_word = ( $case_sensitive ) ? 'word' : 'lcase(word)'; |
|
197 $where_any = ( count($where_any) > 0 ) ? '( ' . $col_word . ' = \'' . implode('\' OR ' . $col_word . ' = \'', $where_any) . '\' )' : ''; |
|
198 |
|
199 // generate query |
|
200 // using a GROUP BY here ensures that the same word with a different case isn't counted as 2 words - it's all melted back |
|
201 // into one later in the processing stages |
|
202 $group_by = ( $case_sensitive ) ? '' : ' GROUP BY lcase(word);'; |
|
203 $sql = "SELECT word, page_names FROM " . table_prefix . "search_index WHERE {$where_any}{$group_by}"; |
|
204 if ( !($q = $db->sql_unbuffered_query($sql)) ) |
|
205 $db->_die('Error is in perform_search(), includes/search.php, query 1'); |
|
206 |
|
207 $word_tracking = array(); |
|
208 if ( $row = $db->fetchrow() ) |
|
209 { |
|
210 do |
|
211 { |
|
212 // get page list |
|
213 $pages =& $row['page_names']; |
|
214 $ns_list = '(' . implode('|', array_keys($paths->nslist)) . ')'; |
|
215 if ( strpos($pages, ',') ) |
206 { |
216 { |
207 $k = explode(',', $keys); |
217 // the term occurs in more than one page |
208 foreach($k as $idxkey) |
218 |
|
219 // Find page IDs that contain commas |
|
220 // This should never happen because commas are escaped by sanitize_page_id(). Nevertheless for compatibility with older |
|
221 // databases, and to alleviate the concerns of hackers, we'll accommodate for page IDs with commas here by checking for |
|
222 // IDs that don't match the pattern for stringified page ID + namespace. If it doesn't match, that means it's a continuation |
|
223 // of the previous ID and should be concatenated to the previous entry. |
|
224 $matches = explode(',', $pages); |
|
225 $prev = false; |
|
226 foreach ( $matches as $i => $_ ) |
209 { |
227 { |
210 if(isset($texts[$idxkey])) |
228 $match =& $matches[$i]; |
|
229 if ( !preg_match("/^ns=$ns_list;pid=(.+)$/", $match) && $prev ) |
211 { |
230 { |
212 $results[$idxkey] = $texts[$idxkey]; |
231 $matches[$prev] .= ',' . $match; |
|
232 unset($match, $matches[$i]); |
|
233 continue; |
|
234 } |
|
235 $prev = $i; |
|
236 } |
|
237 unset($match); |
|
238 |
|
239 // Iterate through each of the results, assigning scores based on how many times the page has shown up. |
|
240 // This works because this phase of the search is strongly word-based not page-based. If a page shows up |
|
241 // multiple times while fetching the result rows from the search_index table, it simply means that page |
|
242 // contains more than one of the terms the user searched for. |
|
243 |
|
244 foreach ( $matches as $match ) |
|
245 { |
|
246 if ( isset($scores[$match]) ) |
|
247 { |
|
248 $scores[$match]++; |
213 } |
249 } |
214 else |
250 else |
215 { |
251 { |
216 if(preg_match('#^([0-9]+)$#', $idxkey)) |
252 $scores[$match] = 1; |
217 { |
253 } |
218 $idxkey = intval($idxkey); |
254 if ( isset($word_tracking[$match]) ) |
219 if(isset($texts[$idxkey])) $results[$idxkey] = $texts[$idxkey]; |
255 { |
220 } |
256 $word_tracking[$match][] = $row['word']; |
|
257 } |
|
258 else |
|
259 { |
|
260 $word_tracking[$match] = array($row['word']); |
221 } |
261 } |
222 } |
262 } |
223 } |
263 } |
224 } |
264 else |
225 } |
|
226 // Quoted terms |
|
227 foreach($query['trm'] as $userterm) |
|
228 { |
|
229 if(!preg_match('/[\s"\'~`!@#\$%\^&\*\(\)\{\}:;<>,.\/\?_-]/', $userterm)) continue; |
|
230 foreach($texts as $k => $t) |
|
231 { |
|
232 if(strstr($this->convertCase($t), $this->convertCase($userterm))) |
|
233 { |
265 { |
234 // We have a match! |
266 // the term only occurs in one page |
235 if(!isset($results[$k])) $results[$k] = $t; |
267 if ( isset($scores[$pages]) ) |
|
268 { |
|
269 $scores[$pages]++; |
|
270 } |
|
271 else |
|
272 { |
|
273 $scores[$pages] = 1; |
|
274 } |
|
275 if ( isset($word_tracking[$pages]) ) |
|
276 { |
|
277 $word_tracking[$pages][] = $row['word']; |
|
278 } |
|
279 else |
|
280 { |
|
281 $word_tracking[$pages] = array($row['word']); |
|
282 } |
236 } |
283 } |
237 } |
284 } |
238 } |
285 while ( $row = $db->fetchrow() ); |
239 // Remove excluded terms |
286 } |
240 foreach($results as $k => $r) |
287 $db->free_result(); |
241 { |
288 |
242 foreach($query['not'] as $not) |
289 // |
243 { |
290 // STAGE 2: FIRST ELIMINATION ROUND |
244 if(strstr($this->convertCase($r), $this->convertCase($not))) unset($results[$k]); |
291 // Iterate through the list of required terms. If a given page is not found to have the required term, eliminate it |
245 } |
292 // |
246 } |
293 |
247 if(!$any) |
294 foreach ( $query['req'] as $term ) |
248 { |
295 { |
249 // Remove results not containing all terms |
296 foreach ( $word_tracking as $i => $page ) |
250 foreach($results as $k => $r) |
297 { |
251 { |
298 if ( !in_array($term, $page) ) |
252 foreach($query['any'] as $term) |
|
253 { |
299 { |
254 if(!strstr($this->convertCase($r), $this->convertCase($term))) unset($results[$k]); |
300 unset($word_tracking[$i], $scores[$i]); |
255 } |
301 } |
256 } |
302 } |
257 } |
303 } |
258 // Remove results not containing all required terms |
304 } |
259 foreach($results as $k => $r) |
305 |
260 { |
306 // |
261 foreach($query['req'] as $term) |
307 // STAGE 3: PHRASE SEARCHING |
262 { |
308 // Use LIKE to find pages with specified phrases. We can do a super-picky single query without another elimination round because |
263 if(!strstr($this->convertCase($r), $this->convertCase($term))) unset($results[$k]); |
309 // at this stage we can search the full page_text column instead of relying on a word list. |
264 } |
310 // |
265 } |
311 |
266 return $results; |
312 // We can skip this stage if none of these special terms apply |
267 } |
313 |
268 |
314 $text_col = ( $case_sensitive ) ? 'page_text' : 'lcase(page_text)'; |
269 /** |
315 |
270 * This is the same as performCoreSearch, but $texts should be a MySQL result resource. This can save tremendous amounts of memory on large sites. |
316 if ( count($query_phrase['any']) > 0 || count($query_phrase['req']) > 0 ) |
271 * @param array $query A search query parsed with Searcher::parseQuery() |
317 { |
272 * @param string $texts A text MySQL query that selects the text as the first column and the index key as the second column |
318 |
273 * @param bool $exact_order If true, only matches results with the terms in the same order as the terms in the query |
319 $where_any = array(); |
274 * @return array An associative array of results |
320 foreach ( $query_phrase['any'] as $term ) |
275 * @access private |
321 { |
276 */ |
322 $term = escape_string_like($term); |
277 function performCoreSearchMySQL($query, $texts, $exact_order = false, $any = false) |
323 if ( !$case_sensitive ) |
278 { |
324 $term = strtolower($term); |
279 global $db; |
325 $where_any[] = $term; |
280 $results = Array(); |
326 } |
281 if($exact_order) |
327 |
282 { |
328 $where_any = ( count($where_any) > 0 ) ? "( $text_col LIKE '%" . implode("%' OR $text_col LIKE '%", $where_any) . "%' )" : ''; |
283 $query = $this->concatQueryTerms($query); |
329 |
284 } |
330 // Also do required columns, but use AND to ensure that all required terms are included |
285 $query['trm'] = array_merge($query['any'], $query['req']); |
331 $where_req = array(); |
286 # Find all remotely possible results first |
332 foreach ( $query_phrase['req'] as $term ) |
287 $texts = $db->sql_query($texts); |
333 { |
288 if ( !$texts ) |
334 $term = escape_string_like($term); |
289 $db->_die('The error is in the search engine.'); |
335 if ( !$case_sensitive ) |
290 if ( $r = $db->fetchrow_num($texts) ) |
336 $term = strtolower($term); |
|
337 $where_req[] = $term; |
|
338 } |
|
339 $and_clause = ( $where_any != '' ) ? 'AND ' : ''; |
|
340 $where_req = ( count($where_req) > 0 ) ? "{$and_clause}$text_col LIKE '%" . implode("%' AND $text_col LIKE '%", $where_req) . "%'" : ''; |
|
341 |
|
342 $sql = 'SELECT CONCAT("ns=",namespace,";pid=",page_id) AS id FROM ' . table_prefix . "page_text WHERE $where_any $where_req;"; |
|
343 if ( !($q = $db->sql_unbuffered_query($sql)) ) |
|
344 $db->_die('Error is in perform_search(), includes/search.php, query 2. Parsed query dump follows:<pre>(indexable) ' . htmlspecialchars(print_r($query, true)) . '(non-indexable) ' . htmlspecialchars(print_r($query_phrase, true)) . '</pre>'); |
|
345 |
|
346 if ( $row = $db->fetchrow() ) |
291 { |
347 { |
292 do |
348 do |
293 { |
349 { |
294 foreach($this->index as $term => $keys) |
350 $id =& $row['id']; |
|
351 if ( isset($scores[$id]) ) |
295 { |
352 { |
296 foreach($query['trm'] as $userterm) |
353 $scores[$id]++; |
|
354 } |
|
355 else |
|
356 { |
|
357 $scores[$id] = 1; |
|
358 } |
|
359 } |
|
360 while ( $row = $db->fetchrow() ); |
|
361 } |
|
362 $db->free_result(); |
|
363 } |
|
364 |
|
365 // |
|
366 // STAGE 4 - SELECT PAGE TEXT AND ELIMINATE NOTS |
|
367 // At this point, we have a complete list of all the possible pages. Now we want to obtain the page text, and within the same query |
|
368 // eliminate any terms that shouldn't be in there. |
|
369 // |
|
370 |
|
371 // Generate master word list for the highlighter |
|
372 $word_list = array_values(array_merge($query['any'], $query['req'], $query_phrase['any'], $query_phrase['req'])); |
|
373 |
|
374 $text_where = array(); |
|
375 foreach ( $scores as $page_id => $_ ) |
|
376 { |
|
377 $text_where[] = $db->escape($page_id); |
|
378 } |
|
379 $text_where = '( CONCAT("ns=",t.namespace,";pid=",t.page_id) = \'' . implode('\' OR CONCAT("ns=",t.namespace,";pid=",t.page_id) = \'', $text_where) . '\' )'; |
|
380 |
|
381 if ( count($query['not']) > 0 ) |
|
382 $text_where .= ' AND'; |
|
383 |
|
384 $where_not = array(); |
|
385 foreach ( $query['not'] as $term ) |
|
386 { |
|
387 $term = escape_string_like($term); |
|
388 if ( !$case_sensitive ) |
|
389 $term = strtolower($term); |
|
390 $where_not[] = $term; |
|
391 } |
|
392 $where_not = ( count($where_not) > 0 ) ? "$text_col NOT LIKE '%" . implode("%' AND $text_col NOT LIKE '%", $where_not) . "%'" : ''; |
|
393 |
|
394 $sql = 'SELECT CONCAT("ns=",t.namespace,";pid=",t.page_id) AS id, t.page_id, t.namespace, CHAR_LENGTH(t.page_text) AS page_length, t.page_text, p.name AS page_name FROM ' . table_prefix . "page_text AS t |
|
395 LEFT JOIN " . table_prefix . "pages AS p |
|
396 ON ( p.urlname = t.page_id AND p.namespace = t.namespace ) |
|
397 WHERE $text_where $where_not;"; |
|
398 if ( !($q = $db->sql_unbuffered_query($sql)) ) |
|
399 $db->_die('Error is in perform_search(), includes/search.php, query 3'); |
|
400 |
|
401 $page_data = array(); |
|
402 if ( $row = $db->fetchrow() ) |
|
403 { |
|
404 do |
|
405 { |
|
406 $row['page_text'] = htmlspecialchars($row['page_text']); |
|
407 $row['page_name'] = htmlspecialchars($row['page_name']); |
|
408 |
|
409 // Highlight results (this is wonderfully automated) |
|
410 $row['page_text'] = highlight_and_clip_search_result($row['page_text'], $word_list, $case_sensitive); |
|
411 if ( strlen($row['page_text']) > 250 && !preg_match('/^\.\.\.(.+)\.\.\.$/', $row['page_text']) ) |
|
412 { |
|
413 $row['page_text'] = substr($row['page_text'], 0, 150) . '...'; |
|
414 } |
|
415 $row['page_name'] = highlight_search_result($row['page_name'], $word_list, $case_sensitive); |
|
416 |
|
417 $page_data[$row['id']] = $row; |
|
418 } |
|
419 while ( $row = $db->fetchrow() ); |
|
420 } |
|
421 $db->free_result(); |
|
422 |
|
423 // |
|
424 // STAGE 5 - SPECIAL PAGE TITLE SEARCH |
|
425 // Iterate through $paths->pages and check the titles for search terms. Score accordingly. |
|
426 // |
|
427 |
|
428 foreach ( $paths->pages as $page ) |
|
429 { |
|
430 if ( $page['namespace'] != 'Special' ) |
|
431 continue; |
|
432 $idstring = 'ns=' . $page['namespace'] . ';pid=' . $page['urlname_nons']; |
|
433 $any = array_merge($query['any'], $query_phrase['any']); |
|
434 foreach ( $any as $term ) |
|
435 { |
|
436 if ( $case_sensitive ) |
|
437 { |
|
438 if ( strstr($page['name'], $term) || strstr($page['urlname_nons'], $term) ) |
|
439 { |
|
440 ( isset($scores[$idstring]) ) ? $scores[$idstring]++ : $scores[$idstring] = 1; |
|
441 } |
|
442 } |
|
443 else |
|
444 { |
|
445 if ( strstr(strtolower($page['name']), strtolower($term)) || strstr(strtolower($page['urlname_nons']), strtolower($term)) ) |
|
446 { |
|
447 ( isset($scores[$idstring]) ) ? $scores[$idstring]++ : $scores[$idstring] = 1; |
|
448 } |
|
449 } |
|
450 } |
|
451 if ( isset($scores[$idstring]) ) |
|
452 { |
|
453 $page_data[$idstring] = array( |
|
454 'page_name' => $page['name'], |
|
455 'page_text' => '', |
|
456 'page_id' => $page['urlname_nons'], |
|
457 'namespace' => $page['namespace'], |
|
458 'score' => $scores[$idstring], |
|
459 'page_length' => 1, |
|
460 'page_note' => '[Special page]' |
|
461 ); |
|
462 } |
|
463 } |
|
464 |
|
465 // |
|
466 // STAGE 6 - SECOND ELIMINATION ROUND |
|
467 // Iterate through the list of required terms. If a given page is not found to have the required term, eliminate it |
|
468 // |
|
469 |
|
470 $required = array_merge($query['req'], $query_phrase['req']); |
|
471 foreach ( $required as $term ) |
|
472 { |
|
473 foreach ( $page_data as $id => $page ) |
|
474 { |
|
475 if ( ( $page['namespace'] == 'Special' || ( $page['namespace'] != 'Special' && !strstr($page['page_text'], $term) ) ) && !strstr($page['page_id'], $term) && !strstr($page['page_name'], $term) ) |
|
476 { |
|
477 unset($page_data[$id]); |
|
478 } |
|
479 } |
|
480 } |
|
481 |
|
482 // At this point, all of our normal results are in. However, we can also allow plugins to hook into the system and score their own |
|
483 // pages and add text, etc. as necessary. |
|
484 // Plugins are COMPLETELY responsible for using the search terms and handling Boolean logic properly |
|
485 |
|
486 $code = $plugins->setHook('search_global_inner'); |
|
487 foreach ( $code as $cmd ) |
|
488 { |
|
489 eval($cmd); |
|
490 } |
|
491 |
|
492 // a marvelous debugging aid :-) |
|
493 // die('<pre>' . htmlspecialchars(print_r($page_data, true)) . '</pre>'); |
|
494 |
|
495 // |
|
496 // STAGE 7 - HIGHLIGHT, TRIM, AND SCORE RESULTS |
|
497 // We now have the complete results of the search. We need to trim text down to show only portions of the page containing search |
|
498 // terms, highlight any search terms within the page, and sort the final results array in descending order of score. |
|
499 // |
|
500 |
|
501 // Sort scores array |
|
502 arsort($scores); |
|
503 |
|
504 // Divisor for calculating relevance scores |
|
505 $divisor = count($query['any']) + count($query_phrase['any']) + count($query['req']) + count($query_phrase['not']); |
|
506 |
|
507 foreach ( $scores as $page_id => $score ) |
|
508 { |
|
509 if ( !isset($page_data[$page_id]) ) |
|
510 // It's possible that $scores contains a score for a page that was later eliminated because it contained a disallowed term |
|
511 continue; |
|
512 |
|
513 // Make a copy of the datum, then delete the original (it frees up a LOT of RAM) |
|
514 $datum = $page_data[$page_id]; |
|
515 unset($page_data[$page_id]); |
|
516 |
|
517 // This is an internal value used for sorting - it's no longer needed. |
|
518 unset($datum['id']); |
|
519 |
|
520 // Calculate score |
|
521 if ( $score > $divisor ) |
|
522 $score = $divisor; |
|
523 $datum['score'] = round($score / $divisor, 2) * 100; |
|
524 |
|
525 // Store it in our until-now-unused results array |
|
526 $results[] = $datum; |
|
527 } |
|
528 |
|
529 // Our work here is done. :-D |
|
530 return $results; |
|
531 } |
|
532 |
|
533 /** |
|
534 * Parses a search query into an associative array. The resultant array will be filled with the following values, each an array: |
|
535 * any: Search terms that can optionally be present |
|
536 * req: Search terms that must be present |
|
537 * not: Search terms that should not be present |
|
538 * @param string Search query |
|
539 * @param array Will be filled with parser warnings, such as query too short, words too short, etc. |
|
540 * @return array |
|
541 */ |
|
542 |
|
543 function parse_search_query($query, &$warnings) |
|
544 { |
|
545 $stopwords = get_stopwords(); |
|
546 $ret = array( |
|
547 'any' => array(), |
|
548 'req' => array(), |
|
549 'not' => array() |
|
550 ); |
|
551 $warnings = array(); |
|
552 $terms = array(); |
|
553 $in_quote = false; |
|
554 $start_term = 0; |
|
555 $just_finished = false; |
|
556 for ( $i = 0; $i < strlen($query); $i++ ) |
|
557 { |
|
558 $chr = $query{$i}; |
|
559 $prev = ( $i > 0 ) ? $query{ $i - 1 } : ''; |
|
560 $next = ( ( $i + 1 ) < strlen($query) ) ? $query{ $i + 1 } : ''; |
|
561 |
|
562 if ( ( $chr == ' ' && !$in_quote ) || ( $i + 1 == strlen ( $query ) ) ) |
|
563 { |
|
564 $len = ( $next == '' ) ? $i + 1 : $i - $start_term; |
|
565 $word = substr ( $query, $start_term, $len ); |
|
566 $terms[] = $word; |
|
567 $start_term = $i + 1; |
|
568 } |
|
569 |
|
570 elseif ( $chr == '"' && $in_quote && $prev != '\\' ) |
|
571 { |
|
572 $word = substr ( $query, $start_term, $i - $start_term + 1 ); |
|
573 $start_pos = ( $next == ' ' ) ? $i + 2 : $i + 1; |
|
574 $in_quote = false; |
|
575 } |
|
576 |
|
577 elseif ( $chr == '"' && !$in_quote ) |
|
578 { |
|
579 $in_quote = true; |
|
580 $start_pos = $i; |
|
581 } |
|
582 |
|
583 } |
|
584 |
|
585 $ticker = 0; |
|
586 |
|
587 foreach ( $terms as $element => $__unused ) |
|
588 { |
|
589 $atom =& $terms[$element]; |
|
590 |
|
591 $ticker++; |
|
592 |
|
593 if ( $ticker == 20 ) |
|
594 { |
|
595 $warnings[] = 'Some of your search terms were excluded because searches are limited to 20 terms to prevent excessive server load.'; |
|
596 break; |
|
597 } |
|
598 |
|
599 if ( substr ( $atom, 0, 2 ) == '+"' && substr ( $atom, ( strlen ( $atom ) - 1 ), 1 ) == '"' ) |
|
600 { |
|
601 $word = substr ( $atom, 2, ( strlen( $atom ) - 3 ) ); |
|
602 if ( strlen ( $word ) < 2 || in_array($word, $stopwords) ) |
|
603 { |
|
604 $warnings[] = 'One or more of your search terms was excluded because either it was less than 2 characters in length or is a common word (a stopword) that is typically found on a large number of pages. Examples of stopwords include "the", "this", "which", "with", etc.'; |
|
605 $ticker--; |
|
606 continue; |
|
607 } |
|
608 if(in_array($word, $ret['req'])) |
|
609 { |
|
610 $warnings[] = 'One or more of your search terms was excluded because duplicate terms were encountered.'; |
|
611 $ticker--; |
|
612 continue; |
|
613 } |
|
614 $ret['req'][] = $word; |
|
615 } |
|
616 elseif ( substr ( $atom, 0, 2 ) == '-"' && substr ( $atom, ( strlen ( $atom ) - 1 ), 1 ) == '"' ) |
|
617 { |
|
618 $word = substr ( $atom, 2, ( strlen( $atom ) - 3 ) ); |
|
619 if ( strlen ( $word ) < 4 ) |
|
620 { |
|
621 $warnings[] = 'One or more of your search terms was excluded because terms must be at least 4 characters in length.'; |
|
622 $ticker--; |
|
623 continue; |
|
624 } |
|
625 if(in_array($word, $ret['not'])) |
|
626 { |
|
627 $warnings[] = 'One or more of your search terms was excluded because duplicate terms were encountered.'; |
|
628 $ticker--; |
|
629 continue; |
|
630 } |
|
631 $ret['not'][] = $word; |
|
632 } |
|
633 elseif ( substr ( $atom, 0, 1 ) == '+' ) |
|
634 { |
|
635 $word = substr ( $atom, 1 ); |
|
636 if ( strlen ( $word ) < 2 || in_array($word, $stopwords) ) |
|
637 { |
|
638 $warnings[] = 'One or more of your search terms was excluded because either it was less than 2 characters in length or is a common word (a stopword) that is typically found on a large number of pages. Examples of stopwords include "the", "this", "which", "with", etc.'; |
|
639 $ticker--; |
|
640 continue; |
|
641 } |
|
642 if(in_array($word, $ret['req'])) |
|
643 { |
|
644 $warnings[] = 'One or more of your search terms was excluded because duplicate terms were encountered.'; |
|
645 $ticker--; |
|
646 continue; |
|
647 } |
|
648 $ret['req'][] = $word; |
|
649 } |
|
650 elseif ( substr ( $atom, 0, 1 ) == '-' ) |
|
651 { |
|
652 $word = substr ( $atom, 1 ); |
|
653 if ( strlen ( $word ) < 2 || in_array($word, $stopwords) ) |
|
654 { |
|
655 $warnings[] = 'One or more of your search terms was excluded because either it was less than 2 characters in length or is a common word (a stopword) that is typically found on a large number of pages. Examples of stopwords include "the", "this", "which", "with", etc.'; |
|
656 $ticker--; |
|
657 continue; |
|
658 } |
|
659 if(in_array($word, $ret['not'])) |
|
660 { |
|
661 $warnings[] = 'One or more of your search terms was excluded because duplicate terms were encountered.'; |
|
662 $ticker--; |
|
663 continue; |
|
664 } |
|
665 $ret['not'][] = $word; |
|
666 } |
|
667 elseif ( substr ( $atom, 0, 1 ) == '"' && substr ( $atom, ( strlen($atom) - 1 ), 1 ) == '"' ) |
|
668 { |
|
669 $word = substr ( $atom, 1, ( strlen ( $atom ) - 2 ) ); |
|
670 if ( strlen ( $word ) < 2 || in_array($word, $stopwords) ) |
|
671 { |
|
672 $warnings[] = 'One or more of your search terms was excluded because either it was less than 2 characters in length or is a common word (a stopword) that is typically found on a large number of pages. Examples of stopwords include "the", "this", "which", "with", etc.'; |
|
673 $ticker--; |
|
674 continue; |
|
675 } |
|
676 if(in_array($word, $ret['any'])) |
|
677 { |
|
678 $warnings[] = 'One or more of your search terms was excluded because duplicate terms were encountered.'; |
|
679 $ticker--; |
|
680 continue; |
|
681 } |
|
682 $ret['any'][] = $word; |
|
683 } |
|
684 else |
|
685 { |
|
686 $word = $atom; |
|
687 if ( strlen ( $word ) < 2 || in_array($word, $stopwords) ) |
|
688 { |
|
689 $warnings[] = 'One or more of your search terms was excluded because either it was less than 2 characters in length or is a common word (a stopword) that is typically found on a large number of pages. Examples of stopwords include "the", "this", "which", "with", etc.'; |
|
690 $ticker--; |
|
691 continue; |
|
692 } |
|
693 if(in_array($word, $ret['any'])) |
|
694 { |
|
695 $warnings[] = 'One or more of your search terms was excluded because duplicate terms were encountered.'; |
|
696 $ticker--; |
|
697 continue; |
|
698 } |
|
699 $ret['any'][] = $word; |
|
700 } |
|
701 } |
|
702 return $ret; |
|
703 } |
|
704 |
|
705 /** |
|
706 * Escapes a string for use in a LIKE clause. |
|
707 * @param string |
|
708 * @return string |
|
709 */ |
|
710 |
|
711 function escape_string_like($string) |
|
712 { |
|
713 global $db, $session, $paths, $template, $plugins; // Common objects |
|
714 $string = $db->escape($string); |
|
715 $string = str_replace(array('%', '_'), array('\%', '\_'), $string); |
|
716 return $string; |
|
717 } |
|
718 |
|
719 /** |
|
720 * Wraps <highlight></highlight> tags around all words in both the specified array. Does not perform any clipping. |
|
721 * @param string Text to process |
|
722 * @param array Word list |
|
723 * @param bool If true, searches case-sensitively when highlighting words |
|
724 * @return string |
|
725 */ |
|
726 |
|
727 function highlight_search_result($pt, $words, $case_sensitive = false) |
|
728 { |
|
729 $words2 = array(); |
|
730 for ( $i = 0; $i < sizeof($words); $i++) |
|
731 { |
|
732 if(!empty($words[$i])) |
|
733 $words2[] = preg_quote($words[$i]); |
|
734 } |
|
735 |
|
736 $flag = ( $case_sensitive ) ? '' : 'i'; |
|
737 $regex = '/(' . implode('|', $words2) . ')/' . $flag; |
|
738 $pt = preg_replace($regex, '<highlight>\\1</highlight>', $pt); |
|
739 |
|
740 return $pt; |
|
741 } |
|
742 |
|
743 /** |
|
744 * Wraps <highlight></highlight> tags around all words in both the specified array and the specified text and clips the text to |
|
745 * an appropriate length. |
|
746 * @param string Text to process |
|
747 * @param array Word list |
|
748 * @param bool If true, searches case-sensitively when highlighting words |
|
749 * @return string |
|
750 */ |
|
751 |
|
752 function highlight_and_clip_search_result($pt, $words, $case_sensitive = false) |
|
753 { |
|
754 $cut_off = false; |
|
755 |
|
756 $space_chars = Array("\t", "\n", "\r", " "); |
|
757 |
|
758 $pt = highlight_search_result($pt, $words, $case_sensitive); |
|
759 |
|
760 foreach ( $words as $word ) |
|
761 { |
|
762 // Boldface searched words |
|
763 $ptlen = strlen($pt); |
|
764 for ( $i = 0; $i < $ptlen; $i++ ) |
|
765 { |
|
766 $len = strlen($word); |
|
767 if ( strtolower(substr($pt, $i, $len)) == strtolower($word) ) |
|
768 { |
|
769 $chunk1 = substr($pt, 0, $i); |
|
770 $chunk2 = substr($pt, $i, $len); |
|
771 $chunk3 = substr($pt, ( $i + $len )); |
|
772 $pt = $chunk1 . $chunk2 . $chunk3; |
|
773 $ptlen = strlen($pt); |
|
774 // Cut off text to 150 chars or so |
|
775 if ( !$cut_off ) |
|
776 { |
|
777 $cut_off = true; |
|
778 if ( $i - 75 > 0 ) |
297 { |
779 { |
298 if($this->convertCase($userterm) == $this->convertCase($term)) |
780 // Navigate backwards until a space character is found |
|
781 $chunk = substr($pt, 0, ( $i - 75 )); |
|
782 $final_chunk = $chunk; |
|
783 for ( $j = strlen($chunk); $j > 0; $j = $j - 1 ) |
299 { |
784 { |
300 $k = explode(',', $keys); |
785 if ( in_array($chunk{$j}, $space_chars) ) |
301 foreach($k as $idxkey) |
|
302 { |
786 { |
303 $row[0] = $r[0]; |
787 $final_chunk = substr($chunk, $j + 1); |
304 $row[1] = $r[1]; |
788 break; |
305 if(!isset($row[1])) |
|
306 { |
|
307 echo('PHP PARSER BUG: $row[1] is set but not set... includes/search.php:'.__LINE__); |
|
308 $GLOBALS['template']->footer(); |
|
309 exit; |
|
310 } |
|
311 if($row[1] == $idxkey) |
|
312 $results[$idxkey] = $row[0]; |
|
313 else |
|
314 { |
|
315 if(preg_match('#^([0-9]+)$#', $idxkey)) |
|
316 { |
|
317 $idxkey = intval($idxkey); |
|
318 if($row[1] == $idxkey) $results[$idxkey] = $row[0]; |
|
319 } |
|
320 } |
|
321 } |
789 } |
322 } |
790 } |
|
791 $mid_chunk = substr($pt, ( $i - 75 ), 75); |
|
792 |
|
793 $clipped = '...' . $final_chunk . $mid_chunk . $chunk2; |
|
794 |
|
795 $chunk = substr($pt, ( $i + strlen($chunk2) + 75 )); |
|
796 $final_chunk = $chunk; |
|
797 for ( $j = 0; $j < strlen($chunk); $j++ ) |
|
798 { |
|
799 if ( in_array($chunk{$j}, $space_chars) ) |
|
800 { |
|
801 $final_chunk = substr($chunk, 0, $j); |
|
802 break; |
|
803 } |
|
804 } |
|
805 |
|
806 $end_chunk = substr($pt, ( $i + strlen($chunk2) ), 75 ); |
|
807 |
|
808 $clipped .= $end_chunk . $final_chunk . '...'; |
|
809 |
|
810 $pt = $clipped; |
323 } |
811 } |
|
812 else if ( strlen($pt) > 200 ) |
|
813 { |
|
814 $mid_chunk = substr($pt, ( $i - 75 ), 75); |
|
815 |
|
816 $clipped = $chunk1 . $chunk2; |
|
817 |
|
818 $chunk = substr($pt, ( $i + strlen($chunk2) + 75 )); |
|
819 $final_chunk = $chunk; |
|
820 for ( $j = 0; $j < strlen($chunk); $j++ ) |
|
821 { |
|
822 if ( in_array($chunk{$j}, $space_chars) ) |
|
823 { |
|
824 $final_chunk = substr($chunk, 0, $j); |
|
825 break; |
|
826 } |
|
827 } |
|
828 |
|
829 $end_chunk = substr($pt, ( $i + strlen($chunk2) ), 75 ); |
|
830 |
|
831 $clipped .= $end_chunk . $final_chunk . '...'; |
|
832 |
|
833 $pt = $clipped; |
|
834 |
|
835 } |
|
836 break 2; |
324 } |
837 } |
325 // Quoted terms |
838 } |
326 foreach($query['trm'] as $userterm) |
839 } |
327 { |
840 $cut_off = false; |
328 if(!preg_match('/[\s"\'~`!@#\$%\^&\*\(\)\{\}:;<>,.\/\?_-]/', $userterm)) continue; |
841 } |
329 if(strstr($this->convertCase($r[0]), $this->convertCase($userterm))) |
842 return $pt; |
330 { |
|
331 // We have a match! |
|
332 if(!isset($results[$r[1]])) $results[$r[1]] = $r[0]; |
|
333 } |
|
334 } |
|
335 } while( $r = $db->fetchrow_num($texts) ); |
|
336 } |
|
337 // Remove excluded terms |
|
338 foreach($results as $k => $r) |
|
339 { |
|
340 foreach($query['not'] as $not) |
|
341 { |
|
342 if(strstr($this->convertCase($r), $this->convertCase($not))) unset($results[$k]); |
|
343 } |
|
344 } |
|
345 if(!$any) |
|
346 { |
|
347 // Remove results not containing all terms |
|
348 foreach($results as $k => $r) |
|
349 { |
|
350 foreach($query['any'] as $term) |
|
351 { |
|
352 if(!strstr($this->convertCase($r), $this->convertCase($term))) unset($results[$k]); |
|
353 } |
|
354 } |
|
355 } |
|
356 // Remove results not containing all terms |
|
357 foreach($results as $k => $r) |
|
358 { |
|
359 foreach($query['req'] as $term) |
|
360 { |
|
361 if(!strstr($this->convertCase($r), $this->convertCase($term))) unset($results[$k]); |
|
362 } |
|
363 } |
|
364 return $results; |
|
365 } |
|
366 |
|
367 function concatQueryTerms($query) |
|
368 { |
|
369 $tmp = implode(' ', $query['any']); |
|
370 unset($query['any']); |
|
371 $query['any'] = Array(0 => $tmp); |
|
372 return $query; |
|
373 } |
|
374 |
|
375 /** |
|
376 * Builds a basic assoc array with a more organized version of the query |
|
377 */ |
|
378 |
|
379 function parseQuery($query) |
|
380 { |
|
381 $ret = array( |
|
382 'any' => array(), |
|
383 'req' => array(), |
|
384 'not' => array() |
|
385 ); |
|
386 $terms = array(); |
|
387 $in_quote = false; |
|
388 $start_term = 0; |
|
389 $just_finished = false; |
|
390 for ( $i = 0; $i < strlen($query); $i++ ) |
|
391 { |
|
392 $chr = $query{$i}; |
|
393 $prev = ( $i > 0 ) ? $query{ $i - 1 } : ''; |
|
394 $next = ( ( $i + 1 ) < strlen($query) ) ? $query{ $i + 1 } : ''; |
|
395 |
|
396 if ( ( $chr == ' ' && !$in_quote ) || ( $i + 1 == strlen ( $query ) ) ) |
|
397 { |
|
398 $len = ( $next == '' ) ? $i + 1 : $i - $start_term; |
|
399 $word = substr ( $query, $start_term, $len ); |
|
400 $terms[] = $word; |
|
401 $start_term = $i + 1; |
|
402 } |
|
403 |
|
404 elseif ( $chr == '"' && $in_quote && $prev != '\\' ) |
|
405 { |
|
406 $word = substr ( $query, $start_term, $i - $start_term + 1 ); |
|
407 $start_pos = ( $next == ' ' ) ? $i + 2 : $i + 1; |
|
408 $in_quote = false; |
|
409 } |
|
410 |
|
411 elseif ( $chr == '"' && !$in_quote ) |
|
412 { |
|
413 $in_quote = true; |
|
414 $start_pos = $i; |
|
415 } |
|
416 |
|
417 } |
|
418 |
|
419 $ticker = 0; |
|
420 |
|
421 foreach ( $terms as $element => $__unused ) |
|
422 { |
|
423 $atom =& $terms[$element]; |
|
424 |
|
425 $ticker++; |
|
426 |
|
427 if ( $ticker == 20 ) |
|
428 { |
|
429 $this->warn('Some of your search terms were excluded because searches are limited to 20 terms to prevent excessive server load.'); |
|
430 break; |
|
431 } |
|
432 |
|
433 if ( substr ( $atom, 0, 2 ) == '+"' && substr ( $atom, ( strlen ( $atom ) - 1 ), 1 ) == '"' ) |
|
434 { |
|
435 $word = substr ( $atom, 2, ( strlen( $atom ) - 3 ) ); |
|
436 if ( strlen ( $word ) < 4 ) |
|
437 { |
|
438 $this->warn('One or more of your search terms was excluded because terms must be at least 4 characters in length.'); |
|
439 $ticker--; |
|
440 continue; |
|
441 } |
|
442 if(in_array($word, $ret['req'])) |
|
443 { |
|
444 $this->warn('One or more of your search terms was excluded because duplicate terms were encountered.'); |
|
445 $ticker--; |
|
446 continue; |
|
447 } |
|
448 $ret['req'][] = $word; |
|
449 } |
|
450 elseif ( substr ( $atom, 0, 2 ) == '-"' && substr ( $atom, ( strlen ( $atom ) - 1 ), 1 ) == '"' ) |
|
451 { |
|
452 $word = substr ( $atom, 2, ( strlen( $atom ) - 3 ) ); |
|
453 if ( strlen ( $word ) < 4 ) |
|
454 { |
|
455 $this->warn('One or more of your search terms was excluded because terms must be at least 4 characters in length.'); |
|
456 $ticker--; |
|
457 continue; |
|
458 } |
|
459 if(in_array($word, $ret['not'])) |
|
460 { |
|
461 $this->warn('One or more of your search terms was excluded because duplicate terms were encountered.'); |
|
462 $ticker--; |
|
463 continue; |
|
464 } |
|
465 $ret['not'][] = $word; |
|
466 } |
|
467 elseif ( substr ( $atom, 0, 1 ) == '+' ) |
|
468 { |
|
469 $word = substr ( $atom, 1 ); |
|
470 if ( strlen ( $word ) < 4 ) |
|
471 { |
|
472 $this->warn('One or more of your search terms was excluded because terms must be at least 4 characters in length.'); |
|
473 $ticker--; |
|
474 continue; |
|
475 } |
|
476 if(in_array($word, $ret['req'])) |
|
477 { |
|
478 $this->warn('One or more of your search terms was excluded because duplicate terms were encountered.'); |
|
479 $ticker--; |
|
480 continue; |
|
481 } |
|
482 $ret['req'][] = $word; |
|
483 } |
|
484 elseif ( substr ( $atom, 0, 1 ) == '-' ) |
|
485 { |
|
486 $word = substr ( $atom, 1 ); |
|
487 if ( strlen ( $word ) < 4 ) |
|
488 { |
|
489 $this->warn('One or more of your search terms was excluded because terms must be at least 4 characters in length.'); |
|
490 $ticker--; |
|
491 continue; |
|
492 } |
|
493 if(in_array($word, $ret['not'])) |
|
494 { |
|
495 $this->warn('One or more of your search terms was excluded because duplicate terms were encountered.'); |
|
496 $ticker--; |
|
497 continue; |
|
498 } |
|
499 $ret['not'][] = $word; |
|
500 } |
|
501 elseif ( substr ( $atom, 0, 1 ) == '"' && substr ( $atom, ( strlen($atom) - 1 ), 1 ) == '"' ) |
|
502 { |
|
503 $word = substr ( $atom, 1, ( strlen ( $atom ) - 2 ) ); |
|
504 if ( strlen ( $word ) < 4 ) |
|
505 { |
|
506 $this->warn('One or more of your search terms was excluded because terms must be at least 4 characters in length.'); |
|
507 $ticker--; |
|
508 continue; |
|
509 } |
|
510 if(in_array($word, $ret['any'])) |
|
511 { |
|
512 $this->warn('One or more of your search terms was excluded because duplicate terms were encountered.'); |
|
513 $ticker--; |
|
514 continue; |
|
515 } |
|
516 $ret['any'][] = $word; |
|
517 } |
|
518 else |
|
519 { |
|
520 $word = $atom; |
|
521 if ( strlen ( $word ) < 4 ) |
|
522 { |
|
523 $this->warn('One or more of your search terms was excluded because terms must be at least 4 characters in length.'); |
|
524 $ticker--; |
|
525 continue; |
|
526 } |
|
527 if(in_array($word, $ret['any'])) |
|
528 { |
|
529 $this->warn('One or more of your search terms was excluded because duplicate terms were encountered.'); |
|
530 $ticker--; |
|
531 continue; |
|
532 } |
|
533 $ret['any'][] = $word; |
|
534 } |
|
535 } |
|
536 return $ret; |
|
537 } |
|
538 |
|
539 function highlightResults($query, $starttag = '<b>', $endtag = '</b>') |
|
540 { |
|
541 $query['trm'] = array_merge($query['any'], $query['req']); |
|
542 //die('<pre>'.print_r($query, true).'</pre>'); |
|
543 foreach($query['trm'] as $q) |
|
544 { |
|
545 foreach($this->results as $k => $r) |
|
546 { |
|
547 $startplace = 0; |
|
548 //$this->results[$k] = htmlspecialchars($this->results[$k]); |
|
549 for($i = 0; $i < strlen($r); $i++) |
|
550 { |
|
551 $word = substr($r, $i, strlen($q)); |
|
552 if($this->convertCase($word) == $this->convertCase($q)) |
|
553 { |
|
554 $word = $starttag . $word . $endtag; |
|
555 $this->results[$k] = substr($r, 0, $i) . $word . substr($r, $i + strlen($q), strlen($r)+999999); |
|
556 $startplace = $i - 75; |
|
557 if($startplace < 0) $startplace = 0; |
|
558 $this->results[$k] = '...'.trim(substr($this->results[$k], $startplace, strlen($word) + 150)).'...'; |
|
559 continue 2; |
|
560 } |
|
561 } |
|
562 } |
|
563 } |
|
564 } |
|
565 |
|
566 } |
843 } |
567 |
844 |
568 /** |
845 /** |
569 * Developer-friendly way to do searches. :-) Uses the MySQL FULLTEXT index type. |
846 * Returns a list of words that shouldn't under most circumstances be indexed for searching. Kudos to MySQL. |
570 * @package Enano |
847 * @return array |
571 * @subpackage Search |
848 * @see http://dev.mysql.com/doc/refman/5.0/en/fulltext-stopwords.html |
572 */ |
849 */ |
573 |
850 |
574 class MySQL_Fulltext_Search { |
851 function get_stopwords() |
575 |
852 { |
576 /** |
853 static $stopwords; |
577 * Performs a search. |
854 if ( is_array($stopwords) ) |
578 * @param string The search query |
855 return $stopwords; |
579 * @return resource MySQL result resource - this is an UNBUFFERED query. |
856 |
580 */ |
857 $stopwords = array('a\'s', 'able', 'after', 'afterwards', 'again', |
581 |
858 'against', 'ain\'t', 'all', 'almost', 'alone', 'along', 'already', 'also', 'although', 'always', |
582 function search($query) |
859 'am', 'among', 'amongst', 'an', 'and', 'another', 'any', 'anybody', 'anyhow', 'anyone', 'anything', 'anyway', |
583 { |
860 'anyways', 'anywhere', 'apart', 'appear', 'appreciate', 'appropriate', 'are', 'aren\'t', 'around', 'as', 'aside', |
584 global $db, $session, $paths, $template, $plugins; // Common objects |
861 'ask', 'asking', 'associated', 'at', 'available', 'away', 'awfully', 'be', 'became', 'because', 'become', 'becomes', |
585 |
862 'becoming', 'been', 'before', 'beforehand', 'behind', 'being', 'believe', 'below', 'beside', 'besides', 'best', |
586 $fulltext_col = 'MATCH(t.page_id,t.namespace,p.name,t.page_text) AGAINST (\'' . $db->escape($query) . '\' IN BOOLEAN MODE)'; |
863 'better', 'between', 'beyond', 'both', 'brief', 'but', 'by', 'c\'mon', 'c\'s', 'came', 'can', 'can\'t', 'cannot', |
587 $sql = "SELECT t.page_text,CONCAT('ns=',t.namespace,';pid=',t.page_id) AS page_identifier, $fulltext_col AS score, CHAR_LENGTH(t.page_text) AS length FROM ".table_prefix."page_text AS t |
864 'cant', 'cause', 'causes', 'certain', 'certainly', 'changes', 'clearly', 'co', 'com', 'come', 'comes', 'concerning', |
588 LEFT JOIN ".table_prefix."pages AS p |
865 'consequently', 'consider', 'considering', 'contain', 'containing', 'contains', 'corresponding', 'could', |
589 ON ( p.urlname=t.page_id AND p.namespace=t.namespace) |
866 'couldn\'t', 'course', 'despite', 'did', 'didn\'t', 'different', 'do', |
590 WHERE $fulltext_col > 0 |
867 'does', 'doesn\'t', 'doing', 'don\'t', 'done', 'down', 'downwards', 'during', 'each', 'edu', 'eg', 'eight', |
591 AND p.visible=1 |
868 'either', 'else', 'elsewhere', 'enough', 'entirely', 'especially', 'et', 'etc', 'even', 'ever', 'every', |
592 ORDER BY score DESC;"; |
869 'everybody', 'everyone', 'everything', 'everywhere', 'ex', 'exactly', 'example', 'except', 'far', 'few', 'fifth', |
593 $q = $db->sql_unbuffered_query($sql); |
870 'first', 'five', 'followed', 'following', 'follows', 'for', 'former', 'formerly', 'forth', 'four', 'from', |
594 if ( !$q ) |
871 'further', 'get', 'gets', 'getting', 'given', 'gives', 'go', 'goes', 'going', 'gone', 'got', |
595 $db->_die(); |
872 'gotten', 'had', 'hadn\'t', 'happens', 'hardly', 'has', 'hasn\'t', 'have', 'haven\'t', 'having', |
596 |
873 'he', 'he\'s', 'hello', 'help', 'hence', 'her', 'here', 'here\'s', 'hereafter', 'hereby', 'herein', 'hereupon', |
597 return $q; |
874 'hers', 'herself', 'hi', 'him', 'himself', 'his', 'hither', 'hopefully', 'how', 'howbeit', 'however', 'i\'d', |
598 } |
875 'i\'ll', 'i\'m', 'i\'ve', 'ie', 'if', 'ignored', 'immediate', 'in', 'inasmuch', 'inc', 'indeed', 'indicate', |
599 |
876 'indicated', 'indicates', 'inner', 'insofar', 'instead', 'into', 'inward', 'is', 'isn\'t', 'it', 'it\'d', 'it\'ll', |
600 function highlight_result($query, $result) |
877 'it\'s', 'its', 'itself', 'just', 'keep', 'keeps', 'kept', 'know', 'knows', 'known', 'last', 'lately', 'later', |
601 { |
878 'latter', 'latterly', 'least', 'less', 'lest', 'let', 'let\'s', 'like', 'liked', 'likely', 'little', 'look', |
602 global $db, $session, $paths, $template, $plugins; // Common objects |
879 'looking', 'looks', 'ltd', 'mainly', 'many', 'may', 'maybe', 'me', 'mean', 'meanwhile', 'merely', 'might', 'more', |
603 $search = new Searcher(); |
880 'moreover', 'most', 'mostly', 'much', 'must', 'my', 'myself', 'name', 'namely', 'nd', 'near', 'nearly', 'necessary', |
604 $parsed_query = $search->parseQuery($query); |
881 'need', 'needs', 'neither', 'never', 'nevertheless', 'new', 'next', 'nine', 'no', 'nobody', 'non', 'none', 'noone', |
605 return $this->highlight_result_inner($query, $result); |
882 'nor', 'normally', 'not', 'nothing', 'novel', 'now', 'nowhere', 'obviously', 'of', 'off', 'often', 'oh', 'ok', |
606 } |
883 'okay', 'old', 'on', 'once', 'one', 'ones', 'only', 'onto', 'or', 'other', 'others', 'otherwise', 'ought', 'our', |
607 |
884 'ours', 'ourselves', 'out', 'outside', 'over', 'overall', 'own', 'particular', 'particularly', 'per', 'perhaps', |
608 function highlight_result_inner($query, $fulltext, $starttag = '<b>', $endtag = '</b>') |
885 'placed', 'please', 'plus', 'possible', 'presumably', 'probably', 'provides', 'que', 'quite', 'qv', 'rather', 'rd', |
609 { |
886 're', 'really', 'reasonably', 'regarding', 'regardless', 'regards', 'relatively', 'respectively', 'right', 'said', |
610 $result = false; |
887 'same', 'saw', 'say', 'saying', 'says', 'second', 'secondly', 'see', 'seeing', 'seem', 'seemed', 'seeming', 'seems', |
611 $query['trm'] = array_merge($query['any'], $query['req']); |
888 'seen', 'self', 'selves', 'sensible', 'sent', 'serious', 'seriously', 'seven', 'several', 'shall', 'she', 'should', |
612 //die('<pre>'.print_r($query, true).'</pre>'); |
889 'shouldn\'t', 'since', 'six', 'so', 'some', 'somebody', 'somehow', 'someone', 'something', 'sometime', 'sometimes', |
613 foreach($query['trm'] as $q) |
890 'somewhat', 'somewhere', 'soon', 'sorry', 'specified', 'specify', 'specifying', 'still', 'sub', 'such', 'sup', |
614 { |
891 'sure', 't\'s', 'take', 'taken', 'tell', 'tends', 'th', 'than', 'thank', 'thanks', 'thanx', 'that', 'that\'s', |
615 $startplace = 0; |
892 'thats', 'the', 'their', 'theirs', 'them', 'themselves', 'then', 'thence', 'there', 'there\'s', 'thereafter', |
616 //$this->results[$k] = htmlspecialchars($this->results[$k]); |
893 'thereby', 'therefore', 'therein', 'theres', 'thereupon', 'these', 'they', 'they\'d', 'they\'ll', 'they\'re', |
617 for($i = 0; $i < strlen($r); $i++) |
894 'they\'ve', 'think', 'third', 'this', 'thorough', 'thoroughly', 'those', 'though', 'three', 'through', 'throughout', |
618 { |
895 'thru', 'thus', 'to', 'together', 'too', 'took', 'toward', 'towards', 'tried', 'tries', 'truly', 'try', 'trying', |
619 $word = substr($r, $i, strlen($q)); |
896 'twice', 'two', 'un', 'under', 'unfortunately', 'unless', 'unlikely', 'until', 'unto', 'up', 'upon', 'us', 'use', |
620 if($this->convertCase($word) == $this->convertCase($q)) |
897 'used', 'useful', 'uses', 'using', 'usually', 'value', 'various', 'very', 'via', 'viz', 'vs', 'want', 'wants', |
621 { |
898 'was', 'wasn\'t', 'way', 'we', 'we\'d', 'we\'ll', 'we\'re', 'we\'ve', 'welcome', 'well', 'went', 'were', 'weren\'t', |
622 $word = $starttag . $word . $endtag; |
899 'what', 'what\'s', 'whatever', 'when', 'whence', 'whenever', 'where', 'where\'s', 'whereafter', 'whereas', |
623 $result = substr($fulltext, 0, $i) . $word . substr($r, $i + strlen($q), strlen($r)+99999999); |
900 'whereby', 'wherein', 'whereupon', 'wherever', 'whether', 'which', 'while', 'whither', 'who', 'who\'s', 'whoever', |
624 $startplace = $i - 75; |
901 'whole', 'whom', 'whose', 'why', 'will', 'willing', 'wish', 'with', 'within', 'without', 'won\'t', 'wonder', |
625 if($startplace < 0) $startplace = 0; |
902 'would', 'would', 'wouldn\'t', 'yes', 'yet', 'you', 'you\'d', 'you\'ll', 'you\'re', 'you\'ve', 'your', 'yours', |
626 $result = '...'.trim(substr($result, $startplace, strlen($word) + 150)).'...'; |
903 'yourself', 'yourselves', 'zero'); |
627 continue 2; |
904 return $stopwords; |
628 } |
|
629 } |
|
630 } |
|
631 return $result; |
|
632 } |
|
633 |
|
634 } |
905 } |
635 |
906 |
636 ?> |
907 ?> |