184 $term = strtolower($term); |
184 $term = strtolower($term); |
185 $where_any[] = $term; |
185 $where_any[] = $term; |
186 } |
186 } |
187 |
187 |
188 $col_word = ( $case_sensitive ) ? 'word' : 'word_lcase'; |
188 $col_word = ( $case_sensitive ) ? 'word' : 'word_lcase'; |
189 $where_any = ( count($where_any) > 0 ) ? '( ' . $col_word . ' LIKE \'%' . implode('%\' OR ' . $col_word . ' LIKE \'%', $where_any) . '%\' )' : ''; |
189 $where_any_str = ( count($where_any) > 0 ) ? '( ' . $col_word . ' LIKE \'%' . implode('%\' OR ' . $col_word . ' LIKE \'%', $where_any) . '%\' )' : ''; |
190 |
190 |
191 // generate query |
191 // generate query |
192 $sql = "SELECT word, page_names FROM " . table_prefix . "search_index WHERE {$where_any}"; |
192 $sql = "SELECT word, page_names FROM " . table_prefix . "search_index WHERE {$where_any_str}"; |
193 if ( !($q = $db->sql_query($sql)) ) |
193 if ( !($q = $db->sql_query($sql)) ) |
194 $db->_die('Error is in perform_search(), includes/search.php, query 1'); |
194 $db->_die('Error is in perform_search(), includes/search.php, query 1'); |
195 |
195 |
196 $word_tracking = array(); |
196 $word_tracking = array(); |
197 if ( $row = $db->fetchrow($q) ) |
197 if ( $row = $db->fetchrow($q) ) |
198 { |
198 { |
199 do |
199 do |
200 { |
200 { |
201 // get page list |
201 // get page list |
202 $pages =& $row['page_names']; |
202 $pages =& $row['page_names']; |
203 if ( strpos($pages, ',') ) |
203 |
204 { |
204 // Find page IDs that contain commas |
205 // the term occurs in more than one page |
205 // This should never happen because commas are escaped by sanitize_page_id(). Nevertheless for compatibility with older |
206 |
206 // databases, and to alleviate the concerns of hackers, we'll accommodate for page IDs with commas here by checking for |
207 // Find page IDs that contain commas |
207 // IDs that don't match the pattern for stringified page ID + namespace. If it doesn't match, that means it's a continuation |
208 // This should never happen because commas are escaped by sanitize_page_id(). Nevertheless for compatibility with older |
208 // of the previous ID and should be concatenated to the previous entry. |
209 // databases, and to alleviate the concerns of hackers, we'll accommodate for page IDs with commas here by checking for |
209 $matches = strpos($pages, ',') ? explode(',', $pages) : array($pages); |
210 // IDs that don't match the pattern for stringified page ID + namespace. If it doesn't match, that means it's a continuation |
210 $prev = false; |
211 // of the previous ID and should be concatenated to the previous entry. |
211 foreach ( $matches as $i => $_ ) |
212 $matches = explode(',', $pages); |
212 { |
213 $prev = false; |
213 $match =& $matches[$i]; |
214 foreach ( $matches as $i => $_ ) |
214 if ( !preg_match("/^ns=$ns_list;pid=(.+)$/", $match) && $prev ) |
215 { |
215 { |
216 $match =& $matches[$i]; |
216 $matches[$prev] .= ',' . $match; |
217 if ( !preg_match("/^ns=$ns_list;pid=(.+)$/", $match) && $prev ) |
217 unset($match, $matches[$i]); |
218 { |
218 continue; |
219 $matches[$prev] .= ',' . $match; |
219 } |
220 unset($match, $matches[$i]); |
220 $prev = $i; |
221 continue; |
221 } |
222 } |
222 unset($match); |
223 $prev = $i; |
223 |
224 } |
224 // Iterate through each of the results, assigning scores based on how many times the page has shown up. |
225 unset($match); |
225 // This works because this phase of the search is strongly word-based not page-based. If a page shows up |
226 |
226 // multiple times while fetching the result rows from the search_index table, it simply means that page |
227 // Iterate through each of the results, assigning scores based on how many times the page has shown up. |
227 // contains more than one of the terms the user searched for. |
228 // This works because this phase of the search is strongly word-based not page-based. If a page shows up |
228 |
229 // multiple times while fetching the result rows from the search_index table, it simply means that page |
229 foreach ( $matches as $match ) |
230 // contains more than one of the terms the user searched for. |
230 { |
231 |
231 $word_cs = (( $case_sensitive ) ? $row['word'] : strtolower($row['word'])); |
232 foreach ( $matches as $match ) |
232 if ( isset($word_tracking[$match]) && in_array($word_cs, $word_tracking[$match]) ) |
233 { |
233 { |
234 $word_cs = (( $case_sensitive ) ? $row['word'] : strtolower($row['word'])); |
234 continue; |
235 if ( isset($word_tracking[$match]) && in_array($word_cs, $word_tracking[$match]) ) |
235 } |
236 { |
236 if ( isset($word_tracking[$match]) ) |
237 continue; |
237 { |
238 } |
|
239 if ( isset($word_tracking[$match]) ) |
238 if ( isset($word_tracking[$match]) ) |
240 { |
239 { |
241 if ( isset($word_tracking[$match]) ) |
240 $word_tracking[$match][] = $word_cs; |
242 { |
|
243 $word_tracking[$match][] = ($word_cs); |
|
244 } |
|
245 } |
241 } |
246 else |
242 } |
247 { |
243 else |
248 $word_tracking[$match] = array($word_cs); |
244 { |
249 } |
245 $word_tracking[$match] = array($word_cs); |
250 $inc = 1; |
246 } |
251 |
|
252 // Is this search term present in the page's title? If so, give extra points |
|
253 preg_match("/^ns=$ns_list;pid=(.+)$/", $pages, $piecesparts); |
|
254 $title = get_page_title_ns($piecesparts[2], $piecesparts[1]); |
|
255 |
|
256 $test_func = ( $case_sensitive ) ? 'strstr' : 'stristr'; |
|
257 if ( $test_func($title, $row['word']) || $test_func($piecesparts[2], $row['word']) ) |
|
258 { |
|
259 $inc = 1.5; |
|
260 } |
|
261 |
247 |
262 if ( isset($scores[$match]) ) |
248 // echo '<pre>' . print_r($word_tracking, true) . '</pre>'; |
263 { |
|
264 $scores[$match] = $scores[$match] + $inc; |
|
265 } |
|
266 else |
|
267 { |
|
268 $scores[$match] = $inc; |
|
269 } |
|
270 } |
|
271 } |
|
272 else |
|
273 { |
|
274 // the term only occurs in one page |
|
275 $word_cs = (( $case_sensitive ) ? $row['word'] : strtolower($row['word'])); |
|
276 |
249 |
277 if ( isset($word_tracking[$pages]) && in_array($word_cs, $word_tracking[$pages]) ) |
|
278 { |
|
279 continue; |
|
280 } |
|
281 if ( isset($word_tracking[$pages]) ) |
|
282 { |
|
283 if ( isset($word_tracking[$pages]) ) |
|
284 { |
|
285 $word_tracking[$pages][] = ($word_cs); |
|
286 } |
|
287 } |
|
288 else |
|
289 { |
|
290 $word_tracking[$pages] = array($word_cs); |
|
291 } |
|
292 $inc = 1; |
250 $inc = 1; |
293 |
251 |
294 // Is this search term present in the page's title? If so, give extra points |
252 // Is this search term present in the page's title? If so, give extra points |
295 preg_match("/^ns=$ns_list;pid=(.+)$/", $pages, $piecesparts); |
253 preg_match("/^ns=$ns_list;pid=(.+)$/", $match, $piecesparts); |
296 $title = get_page_title_ns($piecesparts[2], $piecesparts[1]); |
254 $title = get_page_title_ns($piecesparts[2], $piecesparts[1]); |
297 |
255 |
298 $test_func = ( $case_sensitive ) ? 'strstr' : 'stristr'; |
256 $test_func = ( $case_sensitive ) ? 'strstr' : 'stristr'; |
299 if ( $test_func($title, $row['word']) || $test_func($piecesparts[2], $row['word']) ) |
257 if ( $test_func($title, $row['word']) || $test_func($piecesparts[2], $row['word']) ) |
300 { |
258 { |
301 $inc = 1.5; |
259 $inc = 1.5; |
302 } |
260 } |
303 |
261 |
304 if ( isset($scores[$pages]) ) |
262 // increase points if 2 or more words match a phrase in the title |
305 { |
263 for ( $i = 0; $i < count($where_any) - 1; $i++ ) |
306 $scores[$pages] = $scores[$pages] + $inc; |
264 { |
|
265 $phrase = "{$where_any[$i]} {$where_any[$i + 1]}"; |
|
266 if ( $test_func($title, $phrase) ) |
|
267 { |
|
268 $inc *= 1.25; |
|
269 } |
|
270 } |
|
271 |
|
272 // Deduct points if there are few similarities between the words |
|
273 $lev_array = array(); |
|
274 foreach ( $where_any as $qword ) |
|
275 { |
|
276 if ( strstr($word_cs, $qword) ) |
|
277 $lev_array[ $qword ] = levenshtein($qword, $word_cs); |
|
278 } |
|
279 if ( min($lev_array) > 3 ) |
|
280 { |
|
281 $inc /= array_sum($lev_array) / count($lev_array); |
|
282 } |
|
283 |
|
284 if ( isset($scores[$match]) ) |
|
285 { |
|
286 $scores[$match] = $scores[$match] + $inc; |
307 } |
287 } |
308 else |
288 else |
309 { |
289 { |
310 $scores[$pages] = $inc; |
290 $scores[$match] = $inc; |
311 } |
291 } |
312 } |
292 } |
313 } |
293 } |
314 while ( $row = $db->fetchrow($q) ); |
294 while ( $row = $db->fetchrow($q) ); |
315 } |
295 } |
316 $db->free_result($q); |
296 $db->free_result($q); |
317 |
297 |
318 // |
298 // |
319 // STAGE 2: FIRST ELIMINATION ROUND |
299 // STAGE 2: FIRST ELIMINATION ROUND |
320 // Iterate through the list of required terms. If a given page is not found to have the required term, eliminate it |
300 // Iterate through the list of required terms. If a given page is not found to have the required term, eliminate it |
321 // |
301 // |
322 |
302 |