--- a/includes/search.php Sun Mar 28 21:49:26 2010 -0400
+++ b/includes/search.php Sun Mar 28 23:10:46 2010 -0400
@@ -25,63 +25,63 @@
class Searcher
{
- var $results;
- var $index;
- var $warnings;
- var $match_case = false;
+ var $results;
+ var $index;
+ var $warnings;
+ var $match_case = false;
- function buildIndex($texts)
- {
- $this->index = Array();
- $stopwords = get_stopwords();
+ function buildIndex($texts)
+ {
+ $this->index = Array();
+ $stopwords = get_stopwords();
- foreach($texts as $i => $l)
- {
- $seed = md5(microtime(true) . mt_rand());
- $texts[$i] = str_replace("'", 'xxxApoS'.$seed.'xxx', $texts[$i]);
- $texts[$i] = preg_replace('#([\W_]+)#i', ' ', $texts[$i]);
- $texts[$i] = preg_replace('#([ ]+?)#', ' ', $texts[$i]);
- $texts[$i] = preg_replace('#([\']*){2,}#s', '', $texts[$i]);
- $texts[$i] = str_replace('xxxApoS'.$seed.'xxx', "'", $texts[$i]);
- $l = $texts[$i];
- $words = Array();
- $good_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\' ';
- $good_chars = enano_str_split($good_chars, 1);
- $letters = enano_str_split($l, 1);
- foreach($letters as $x => $t)
- {
- if(!in_array($t, $good_chars))
- unset($letters[$x]);
- }
- $letters = implode('', $letters);
- $words = explode(' ', $letters);
- foreach($words as $c => $w)
- {
- if(strlen($w) < 2 || in_array($w, $stopwords) || strlen($w) > 63 || preg_match('/[\']{2,}/', $w))
- unset($words[$c]);
- else
- $words[$c] = $w;
- }
- $words = array_values($words);
- foreach($words as $c => $w)
- {
- if(isset($this->index[$w]))
- {
- if(!in_array($i, $this->index[$w]))
- $this->index[$w][] = $i;
- }
- else
- {
- $this->index[$w] = Array();
- $this->index[$w][] = $i;
- }
- }
- }
- foreach($this->index as $k => $v)
- {
- $this->index[$k] = implode(',', $this->index[$k]);
- }
- }
+ foreach($texts as $i => $l)
+ {
+ $seed = md5(microtime(true) . mt_rand());
+ $texts[$i] = str_replace("'", 'xxxApoS'.$seed.'xxx', $texts[$i]);
+ $texts[$i] = preg_replace('#([\W_]+)#i', ' ', $texts[$i]);
+ $texts[$i] = preg_replace('#([ ]+?)#', ' ', $texts[$i]);
+ $texts[$i] = preg_replace('#([\']*){2,}#s', '', $texts[$i]);
+ $texts[$i] = str_replace('xxxApoS'.$seed.'xxx', "'", $texts[$i]);
+ $l = $texts[$i];
+ $words = Array();
+ $good_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\' ';
+ $good_chars = enano_str_split($good_chars, 1);
+ $letters = enano_str_split($l, 1);
+ foreach($letters as $x => $t)
+ {
+ if(!in_array($t, $good_chars))
+ unset($letters[$x]);
+ }
+ $letters = implode('', $letters);
+ $words = explode(' ', $letters);
+ foreach($words as $c => $w)
+ {
+ if(strlen($w) < 2 || in_array($w, $stopwords) || strlen($w) > 63 || preg_match('/[\']{2,}/', $w))
+ unset($words[$c]);
+ else
+ $words[$c] = $w;
+ }
+ $words = array_values($words);
+ foreach($words as $c => $w)
+ {
+ if(isset($this->index[$w]))
+ {
+ if(!in_array($i, $this->index[$w]))
+ $this->index[$w][] = $i;
+ }
+ else
+ {
+ $this->index[$w] = Array();
+ $this->index[$w][] = $i;
+ }
+ }
+ }
+ foreach($this->index as $k => $v)
+ {
+ $this->index[$k] = implode(',', $this->index[$k]);
+ }
+ }
}
/**
@@ -100,486 +100,486 @@
function perform_search($query, &$warnings, $case_sensitive = false, &$word_list)
{
- global $db, $session, $paths, $template, $plugins; // Common objects
- global $lang;
-
- $warnings = array();
-
- //
- // STAGE 0: PARSE SEARCH QUERY
- // Identify all terms of the query. Separate between what is required and what is not, and what should be sent through the index as
- // opposed to straight-out LIKE-selected.
- //
+ global $db, $session, $paths, $template, $plugins; // Common objects
+ global $lang;
+
+ $warnings = array();
+
+ //
+ // STAGE 0: PARSE SEARCH QUERY
+ // Identify all terms of the query. Separate between what is required and what is not, and what should be sent through the index as
+ // opposed to straight-out LIKE-selected.
+ //
- $query = parse_search_query($query, $warnings);
+ $query = parse_search_query($query, $warnings);
- // Segregate search terms containing spaces
- $query_phrase = array(
- 'any' => array(),
- 'req' => array()
- );
+ // Segregate search terms containing spaces
+ $query_phrase = array(
+ 'any' => array(),
+ 'req' => array()
+ );
- foreach ( $query['any'] as $i => $_ )
- {
- $term =& $query['any'][$i];
- $term = trim($term);
- // the indexer only indexes words a-z with apostrophes
- if ( preg_match('/[^A-Za-z\']/', $term) )
- {
- $query_phrase['any'][] = $term;
- unset($term, $query['any'][$i]);
- }
- }
- unset($term);
- $query['any'] = array_values($query['any']);
+ foreach ( $query['any'] as $i => $_ )
+ {
+ $term =& $query['any'][$i];
+ $term = trim($term);
+ // the indexer only indexes words a-z with apostrophes
+ if ( preg_match('/[^A-Za-z\']/', $term) )
+ {
+ $query_phrase['any'][] = $term;
+ unset($term, $query['any'][$i]);
+ }
+ }
+ unset($term);
+ $query['any'] = array_values($query['any']);
- foreach ( $query['req'] as $i => $_ )
- {
- $term =& $query['req'][$i];
- $term = trim($term);
- if ( preg_match('/[^A-Za-z\']/', $term) )
- {
- $query_phrase['req'][] = $term;
- unset($term, $query['req'][$i]);
- }
- }
- unset($term);
- $query['req'] = array_values($query['req']);
+ foreach ( $query['req'] as $i => $_ )
+ {
+ $term =& $query['req'][$i];
+ $term = trim($term);
+ if ( preg_match('/[^A-Za-z\']/', $term) )
+ {
+ $query_phrase['req'][] = $term;
+ unset($term, $query['req'][$i]);
+ }
+ }
+ unset($term);
+ $query['req'] = array_values($query['req']);
- $results = array();
- $scores = array();
- $ns_list = '(' . implode('|', array_keys($paths->nslist)) . ')';
+ $results = array();
+ $scores = array();
+ $ns_list = '(' . implode('|', array_keys($paths->nslist)) . ')';
- // FIXME: Update to use FULLTEXT algo when available.
+ // FIXME: Update to use FULLTEXT algo when available.
- // Build an SQL query to load from the index table
- if ( count($query['any']) < 1 && count($query['req']) < 1 && count($query_phrase['any']) < 1 && count($query_phrase['req']) < 1 )
- {
- // This is both because of technical restrictions and devastation that would occur on shared servers/large sites.
- $warnings[] = $lang->get('search_err_query_no_positive');
- return array();
- }
+ // Build an SQL query to load from the index table
+ if ( count($query['any']) < 1 && count($query['req']) < 1 && count($query_phrase['any']) < 1 && count($query_phrase['req']) < 1 )
+ {
+ // This is both because of technical restrictions and devastation that would occur on shared servers/large sites.
+ $warnings[] = $lang->get('search_err_query_no_positive');
+ return array();
+ }
- //
- // STAGE 1
- // Get all possible result pages from the search index. Tally which pages have the most words, and later sort them by boolean relevance
- //
+ //
+ // STAGE 1
+ // Get all possible result pages from the search index. Tally which pages have the most words, and later sort them by boolean relevance
+ //
- // Skip this if no indexable words are included
+ // Skip this if no indexable words are included
- if ( count($query['any']) > 0 || count($query['req']) > 0 )
- {
- $where_any = array();
- foreach ( $query['any'] as $term )
- {
- $term = escape_string_like($term);
- if ( !$case_sensitive )
- $term = strtolower($term);
- $where_any[] = $term;
- }
- foreach ( $query['req'] as $term )
- {
- $term = escape_string_like($term);
- if ( !$case_sensitive )
- $term = strtolower($term);
- $where_any[] = $term;
- }
+ if ( count($query['any']) > 0 || count($query['req']) > 0 )
+ {
+ $where_any = array();
+ foreach ( $query['any'] as $term )
+ {
+ $term = escape_string_like($term);
+ if ( !$case_sensitive )
+ $term = strtolower($term);
+ $where_any[] = $term;
+ }
+ foreach ( $query['req'] as $term )
+ {
+ $term = escape_string_like($term);
+ if ( !$case_sensitive )
+ $term = strtolower($term);
+ $where_any[] = $term;
+ }
- $col_word = ( $case_sensitive ) ? 'word' : 'word_lcase';
- $where_any_str = ( count($where_any) > 0 ) ? '( ' . $col_word . ' LIKE \'%' . implode('%\' OR ' . $col_word . ' LIKE \'%', $where_any) . '%\' )' : '';
+ $col_word = ( $case_sensitive ) ? 'word' : 'word_lcase';
+ $where_any_str = ( count($where_any) > 0 ) ? '( ' . $col_word . ' LIKE \'%' . implode('%\' OR ' . $col_word . ' LIKE \'%', $where_any) . '%\' )' : '';
- // generate query
- $sql = "SELECT word, page_names FROM " . table_prefix . "search_index WHERE {$where_any_str}";
- if ( !($q = $db->sql_query($sql)) )
- $db->_die('Error is in perform_search(), includes/search.php, query 1');
+ // generate query
+ $sql = "SELECT word, page_names FROM " . table_prefix . "search_index WHERE {$where_any_str}";
+ if ( !($q = $db->sql_query($sql)) )
+ $db->_die('Error is in perform_search(), includes/search.php, query 1');
- $word_tracking = array();
- if ( $row = $db->fetchrow($q) )
- {
- do
- {
- // get page list
- $pages =& $row['page_names'];
-
- // Find page IDs that contain commas
- // This should never happen because commas are escaped by sanitize_page_id(). Nevertheless for compatibility with older
- // databases, and to alleviate the concerns of hackers, we'll accommodate for page IDs with commas here by checking for
- // IDs that don't match the pattern for stringified page ID + namespace. If it doesn't match, that means it's a continuation
- // of the previous ID and should be concatenated to the previous entry.
- $matches = strpos($pages, ',') ? explode(',', $pages) : array($pages);
- $prev = false;
- foreach ( $matches as $i => $_ )
- {
- $match =& $matches[$i];
- if ( !preg_match("/^ns=$ns_list;pid=(.+)$/", $match) && $prev )
- {
- $matches[$prev] .= ',' . $match;
- unset($match, $matches[$i]);
- continue;
- }
- $prev = $i;
- }
- unset($match);
+ $word_tracking = array();
+ if ( $row = $db->fetchrow($q) )
+ {
+ do
+ {
+ // get page list
+ $pages =& $row['page_names'];
+
+ // Find page IDs that contain commas
+ // This should never happen because commas are escaped by sanitize_page_id(). Nevertheless for compatibility with older
+ // databases, and to alleviate the concerns of hackers, we'll accommodate for page IDs with commas here by checking for
+ // IDs that don't match the pattern for stringified page ID + namespace. If it doesn't match, that means it's a continuation
+ // of the previous ID and should be concatenated to the previous entry.
+ $matches = strpos($pages, ',') ? explode(',', $pages) : array($pages);
+ $prev = false;
+ foreach ( $matches as $i => $_ )
+ {
+ $match =& $matches[$i];
+ if ( !preg_match("/^ns=$ns_list;pid=(.+)$/", $match) && $prev )
+ {
+ $matches[$prev] .= ',' . $match;
+ unset($match, $matches[$i]);
+ continue;
+ }
+ $prev = $i;
+ }
+ unset($match);
- // Iterate through each of the results, assigning scores based on how many times the page has shown up.
- // This works because this phase of the search is strongly word-based not page-based. If a page shows up
- // multiple times while fetching the result rows from the search_index table, it simply means that page
- // contains more than one of the terms the user searched for.
+ // Iterate through each of the results, assigning scores based on how many times the page has shown up.
+ // This works because this phase of the search is strongly word-based not page-based. If a page shows up
+ // multiple times while fetching the result rows from the search_index table, it simply means that page
+ // contains more than one of the terms the user searched for.
- foreach ( $matches as $match )
- {
- $word_cs = (( $case_sensitive ) ? $row['word'] : strtolower($row['word']));
- if ( isset($word_tracking[$match]) && in_array($word_cs, $word_tracking[$match]) )
- {
- continue;
- }
- if ( isset($word_tracking[$match]) )
- {
- if ( isset($word_tracking[$match]) )
- {
- $word_tracking[$match][] = $word_cs;
- }
- }
- else
- {
- $word_tracking[$match] = array($word_cs);
- }
-
- // echo '<pre>' . print_r($word_tracking, true) . '</pre>';
-
- $inc = 1;
+ foreach ( $matches as $match )
+ {
+ $word_cs = (( $case_sensitive ) ? $row['word'] : strtolower($row['word']));
+ if ( isset($word_tracking[$match]) && in_array($word_cs, $word_tracking[$match]) )
+ {
+ continue;
+ }
+ if ( isset($word_tracking[$match]) )
+ {
+ if ( isset($word_tracking[$match]) )
+ {
+ $word_tracking[$match][] = $word_cs;
+ }
+ }
+ else
+ {
+ $word_tracking[$match] = array($word_cs);
+ }
+
+ // echo '<pre>' . print_r($word_tracking, true) . '</pre>';
+
+ $inc = 1;
- // Is this search term present in the page's title? If so, give extra points
- preg_match("/^ns=$ns_list;pid=(.+)$/", $match, $piecesparts);
- $title = get_page_title_ns($piecesparts[2], $piecesparts[1]);
-
- $test_func = ( $case_sensitive ) ? 'strstr' : 'stristr';
- if ( $test_func($title, $row['word']) || $test_func($piecesparts[2], $row['word']) )
- {
- $inc = 1.5;
- }
-
- // increase points if 2 or more words match a phrase in the title
- for ( $i = 0; $i < count($where_any) - 1; $i++ )
- {
- $phrase = "{$where_any[$i]} {$where_any[$i + 1]}";
- if ( $test_func($title, $phrase) )
- {
- $inc *= 1.25;
- }
- }
-
- // Deduct points if there are few similarities between the words
- $lev_array = array();
- foreach ( $where_any as $qword )
- {
- if ( strstr($word_cs, $qword) )
- $lev_array[ $qword ] = levenshtein($qword, $word_cs);
- }
- if ( min($lev_array) > 3 )
- {
- $inc /= array_sum($lev_array) / count($lev_array);
- }
-
- if ( isset($scores[$match]) )
- {
- $scores[$match] = $scores[$match] + $inc;
- }
- else
- {
- $scores[$match] = $inc;
- }
- }
- }
- while ( $row = $db->fetchrow($q) );
- }
- $db->free_result($q);
-
- //
- // STAGE 2: FIRST ELIMINATION ROUND
- // Iterate through the list of required terms. If a given page is not found to have the required term, eliminate it
- //
+ // Is this search term present in the page's title? If so, give extra points
+ preg_match("/^ns=$ns_list;pid=(.+)$/", $match, $piecesparts);
+ $title = get_page_title_ns($piecesparts[2], $piecesparts[1]);
+
+ $test_func = ( $case_sensitive ) ? 'strstr' : 'stristr';
+ if ( $test_func($title, $row['word']) || $test_func($piecesparts[2], $row['word']) )
+ {
+ $inc = 1.5;
+ }
+
+ // increase points if 2 or more words match a phrase in the title
+ for ( $i = 0; $i < count($where_any) - 1; $i++ )
+ {
+ $phrase = "{$where_any[$i]} {$where_any[$i + 1]}";
+ if ( $test_func($title, $phrase) )
+ {
+ $inc *= 1.25;
+ }
+ }
+
+ // Deduct points if there are few similarities between the words
+ $lev_array = array();
+ foreach ( $where_any as $qword )
+ {
+ if ( strstr($word_cs, $qword) )
+ $lev_array[ $qword ] = levenshtein($qword, $word_cs);
+ }
+ if ( min($lev_array) > 3 )
+ {
+ $inc /= array_sum($lev_array) / count($lev_array);
+ }
+
+ if ( isset($scores[$match]) )
+ {
+ $scores[$match] = $scores[$match] + $inc;
+ }
+ else
+ {
+ $scores[$match] = $inc;
+ }
+ }
+ }
+ while ( $row = $db->fetchrow($q) );
+ }
+ $db->free_result($q);
+
+ //
+ // STAGE 2: FIRST ELIMINATION ROUND
+ // Iterate through the list of required terms. If a given page is not found to have the required term, eliminate it
+ //
- foreach ( $query['req'] as $term )
- {
- foreach ( $word_tracking as $i => $page )
- {
- if ( !in_array($term, $page) )
- {
- unset($word_tracking[$i], $scores[$i]);
- }
- }
- }
- }
-
- //
- // STAGE 3: PHRASE SEARCHING
- // Use LIKE to find pages with specified phrases. We can do a super-picky single query without another elimination round because
- // at this stage we can search the full page_text column instead of relying on a word list.
- //
+ foreach ( $query['req'] as $term )
+ {
+ foreach ( $word_tracking as $i => $page )
+ {
+ if ( !in_array($term, $page) )
+ {
+ unset($word_tracking[$i], $scores[$i]);
+ }
+ }
+ }
+ }
+
+ //
+ // STAGE 3: PHRASE SEARCHING
+ // Use LIKE to find pages with specified phrases. We can do a super-picky single query without another elimination round because
+ // at this stage we can search the full page_text column instead of relying on a word list.
+ //
- // We can skip this stage if none of these special terms apply
+ // We can skip this stage if none of these special terms apply
- $text_col = ( $case_sensitive ) ? 'page_text' : ENANO_SQLFUNC_LOWERCASE . '(page_text)';
- $name_col = ( $case_sensitive ) ? 'name' : ENANO_SQLFUNC_LOWERCASE . '(name)';
- $text_col_join = ( $case_sensitive ) ? 't.page_text' : ENANO_SQLFUNC_LOWERCASE . '(t.page_text)';
- $name_col_join = ( $case_sensitive ) ? 'p.name' : ENANO_SQLFUNC_LOWERCASE . '(p.name)';
-
- $concat_column = ( ENANO_DBLAYER == 'MYSQL' ) ?
- 'CONCAT(\'ns=\',t.namespace,\';pid=\',t.page_id)' :
- "'ns=' || t.namespace || ';pid=' || t.page_id";
+ $text_col = ( $case_sensitive ) ? 'page_text' : ENANO_SQLFUNC_LOWERCASE . '(page_text)';
+ $name_col = ( $case_sensitive ) ? 'name' : ENANO_SQLFUNC_LOWERCASE . '(name)';
+ $text_col_join = ( $case_sensitive ) ? 't.page_text' : ENANO_SQLFUNC_LOWERCASE . '(t.page_text)';
+ $name_col_join = ( $case_sensitive ) ? 'p.name' : ENANO_SQLFUNC_LOWERCASE . '(p.name)';
+
+ $concat_column = ( ENANO_DBLAYER == 'MYSQL' ) ?
+ 'CONCAT(\'ns=\',t.namespace,\';pid=\',t.page_id)' :
+ "'ns=' || t.namespace || ';pid=' || t.page_id";
- if ( count($query_phrase['any']) > 0 || count($query_phrase['req']) > 0 )
- {
+ if ( count($query_phrase['any']) > 0 || count($query_phrase['req']) > 0 )
+ {
- $where_any = array();
- foreach ( $query_phrase['any'] as $term )
- {
- $term = escape_string_like($term);
- if ( !$case_sensitive )
- $term = strtolower($term);
- $where_any[] = "( $text_col LIKE '%$term%' OR $name_col LIKE '%$term%' )";
- }
+ $where_any = array();
+ foreach ( $query_phrase['any'] as $term )
+ {
+ $term = escape_string_like($term);
+ if ( !$case_sensitive )
+ $term = strtolower($term);
+ $where_any[] = "( $text_col LIKE '%$term%' OR $name_col LIKE '%$term%' )";
+ }
- $where_any = ( count($where_any) > 0 ) ? implode(" OR\n ", $where_any) : '';
+ $where_any = ( count($where_any) > 0 ) ? implode(" OR\n ", $where_any) : '';
- // Also do required terms, but use AND to ensure that all required terms are included
- $where_req = array();
- foreach ( $query_phrase['req'] as $term )
- {
- $term = escape_string_like($term);
- if ( !$case_sensitive )
- $term = strtolower($term);
- $where_req[] = "( $text_col LIKE '%$term%' OR $name_col LIKE '%$term%' )";
- }
- $and_clause = ( $where_any != '' ) ? 'AND ' : '';
- $where_req = ( count($where_req) > 0 ) ? "{$and_clause}" . implode(" AND\n ", $where_req) : '';
+ // Also do required terms, but use AND to ensure that all required terms are included
+ $where_req = array();
+ foreach ( $query_phrase['req'] as $term )
+ {
+ $term = escape_string_like($term);
+ if ( !$case_sensitive )
+ $term = strtolower($term);
+ $where_req[] = "( $text_col LIKE '%$term%' OR $name_col LIKE '%$term%' )";
+ }
+ $and_clause = ( $where_any != '' ) ? 'AND ' : '';
+ $where_req = ( count($where_req) > 0 ) ? "{$and_clause}" . implode(" AND\n ", $where_req) : '';
- $sql = 'SELECT ' . $concat_column . ' AS id, p.name, t.page_text FROM ' . table_prefix . "page_text AS t\n"
- . " LEFT JOIN " . table_prefix . "pages AS p\n"
- . " ON ( p.urlname = t.page_id AND p.namespace = t.namespace )\n"
- . " WHERE p.visible = 1 AND (\n $where_any\n $where_req\n );";
- if ( !($q = $db->sql_query($sql)) )
- $db->_die('Error is in perform_search(), includes/search.php, query 2. Parsed query dump follows:<pre>(indexable) ' . htmlspecialchars(print_r($query, true)) . '(non-indexable) ' . htmlspecialchars(print_r($query_phrase, true)) . '</pre>');
+ $sql = 'SELECT ' . $concat_column . ' AS id, p.name, t.page_text FROM ' . table_prefix . "page_text AS t\n"
+ . " LEFT JOIN " . table_prefix . "pages AS p\n"
+ . " ON ( p.urlname = t.page_id AND p.namespace = t.namespace )\n"
+ . " WHERE p.visible = 1 AND (\n $where_any\n $where_req\n );";
+ if ( !($q = $db->sql_query($sql)) )
+ $db->_die('Error is in perform_search(), includes/search.php, query 2. Parsed query dump follows:<pre>(indexable) ' . htmlspecialchars(print_r($query, true)) . '(non-indexable) ' . htmlspecialchars(print_r($query_phrase, true)) . '</pre>');
- if ( $row = $db->fetchrow() )
- {
- do
- {
- $id =& $row['id'];
- $inc = 0.0;
+ if ( $row = $db->fetchrow() )
+ {
+ do
+ {
+ $id =& $row['id'];
+ $inc = 0.0;
- $title = $row['name'];
- $test_func = ( $case_sensitive ) ? 'strstr' : 'stristr';
-
- // Is this search term present in the page's title? If so, give extra points
- $word_list = array_merge($query_phrase['any'], $query_phrase['req']);
- foreach ( $word_list as $word )
- {
- if ( $test_func($title, $word) )
- $inc += 1.5;
- else if ( $test_func($row['page_text'], $word) )
- $inc += 1.0;
- }
-
- // increase points if 2 or more words match a phrase in the title
- for ( $i = 0; $i < count($word_list) - 1; $i++ )
- {
- $phrase = "{$word_list[$i]} {$word_list[$i + 1]}";
- if ( $test_func($title, $phrase) )
- $inc *= 1.25;
- else if ( $test_func($row['page_text'], $phrase) )
- $inc *= 1.125;
- }
-
- if ( isset($scores[$id]) )
- {
- $scores[$id] = $scores[$id] + $inc;
- }
- else
- {
- $scores[$id] = $inc;
- }
- }
- while ( $row = $db->fetchrow() );
- }
- $db->free_result();
- }
+ $title = $row['name'];
+ $test_func = ( $case_sensitive ) ? 'strstr' : 'stristr';
+
+ // Is this search term present in the page's title? If so, give extra points
+ $word_list = array_merge($query_phrase['any'], $query_phrase['req']);
+ foreach ( $word_list as $word )
+ {
+ if ( $test_func($title, $word) )
+ $inc += 1.5;
+ else if ( $test_func($row['page_text'], $word) )
+ $inc += 1.0;
+ }
+
+ // increase points if 2 or more words match a phrase in the title
+ for ( $i = 0; $i < count($word_list) - 1; $i++ )
+ {
+ $phrase = "{$word_list[$i]} {$word_list[$i + 1]}";
+ if ( $test_func($title, $phrase) )
+ $inc *= 1.25;
+ else if ( $test_func($row['page_text'], $phrase) )
+ $inc *= 1.125;
+ }
+
+ if ( isset($scores[$id]) )
+ {
+ $scores[$id] = $scores[$id] + $inc;
+ }
+ else
+ {
+ $scores[$id] = $inc;
+ }
+ }
+ while ( $row = $db->fetchrow() );
+ }
+ $db->free_result();
+ }
- //
- // STAGE 4 - SELECT PAGE TEXT AND ELIMINATE NOTS
- // At this point, we have a complete list of all the possible pages. Now we want to obtain the page text, and within the same query
- // eliminate any terms that shouldn't be in there.
- //
+ //
+ // STAGE 4 - SELECT PAGE TEXT AND ELIMINATE NOTS
+ // At this point, we have a complete list of all the possible pages. Now we want to obtain the page text, and within the same query
+ // eliminate any terms that shouldn't be in there.
+ //
- // Generate master word list for the highlighter
- $word_list = array_values(array_merge($query['any'], $query['req'], $query_phrase['any'], $query_phrase['req']));
+ // Generate master word list for the highlighter
+ $word_list = array_values(array_merge($query['any'], $query['req'], $query_phrase['any'], $query_phrase['req']));
- $text_where = array();
- foreach ( $scores as $page_id => $_ )
- {
- $text_where[] = $db->escape($page_id);
- }
- $text_where = '( ' . $concat_column . ' = \'' . implode('\' OR ' . $concat_column . ' = \'', $text_where) . '\' )';
+ $text_where = array();
+ foreach ( $scores as $page_id => $_ )
+ {
+ $text_where[] = $db->escape($page_id);
+ }
+ $text_where = '( ' . $concat_column . ' = \'' . implode('\' OR ' . $concat_column . ' = \'', $text_where) . '\' )';
- if ( count($query['not']) > 0 )
- $text_where .= ' AND';
+ if ( count($query['not']) > 0 )
+ $text_where .= ' AND';
- $where_not = array();
- foreach ( $query['not'] as $term )
- {
- $term = escape_string_like($term);
- if ( !$case_sensitive )
- $term = strtolower($term);
- $where_not[] = $term;
- }
- $where_not = ( count($where_not) > 0 ) ? "$text_col NOT LIKE '%" . implode("%' AND $text_col NOT LIKE '%", $where_not) . "%'" : '';
+ $where_not = array();
+ foreach ( $query['not'] as $term )
+ {
+ $term = escape_string_like($term);
+ if ( !$case_sensitive )
+ $term = strtolower($term);
+ $where_not[] = $term;
+ }
+ $where_not = ( count($where_not) > 0 ) ? "$text_col NOT LIKE '%" . implode("%' AND $text_col NOT LIKE '%", $where_not) . "%'" : '';
- $sql = 'SELECT ' . $concat_column . ' AS id, t.page_id, t.namespace, CHAR_LENGTH(t.page_text) AS page_length, t.page_text, p.name AS page_name FROM ' . table_prefix . "page_text AS t
- LEFT JOIN " . table_prefix . "pages AS p
- ON ( p.urlname = t.page_id AND p.namespace = t.namespace )
- WHERE p.visible = 1 AND ( $text_where $where_not );";
- if ( !($q = $db->sql_unbuffered_query($sql)) )
- $db->_die('Error is in perform_search(), includes/search.php, query 3');
+ $sql = 'SELECT ' . $concat_column . ' AS id, t.page_id, t.namespace, CHAR_LENGTH(t.page_text) AS page_length, t.page_text, p.name AS page_name FROM ' . table_prefix . "page_text AS t
+ LEFT JOIN " . table_prefix . "pages AS p
+ ON ( p.urlname = t.page_id AND p.namespace = t.namespace )
+ WHERE p.visible = 1 AND ( $text_where $where_not );";
+ if ( !($q = $db->sql_unbuffered_query($sql)) )
+ $db->_die('Error is in perform_search(), includes/search.php, query 3');
- $page_data = array();
- if ( $row = $db->fetchrow() )
- {
- do
- {
- $row['page_text'] = htmlspecialchars($row['page_text']);
- $row['page_name'] = htmlspecialchars($row['page_name']);
+ $page_data = array();
+ if ( $row = $db->fetchrow() )
+ {
+ do
+ {
+ $row['page_text'] = htmlspecialchars($row['page_text']);
+ $row['page_name'] = htmlspecialchars($row['page_name']);
- // Highlight results (this is wonderfully automated)
- $row['page_text'] = highlight_and_clip_search_result($row['page_text'], $word_list, $case_sensitive);
- if ( strlen($row['page_text']) > 250 && !preg_match('/^\.\.\.(.+)\.\.\.$/', $row['page_text']) )
- {
- $row['page_text'] = substr($row['page_text'], 0, 150) . '...';
- }
- $row['page_name'] = highlight_search_result($row['page_name'], $word_list, $case_sensitive);
+ // Highlight results (this is wonderfully automated)
+ $row['page_text'] = highlight_and_clip_search_result($row['page_text'], $word_list, $case_sensitive);
+ if ( strlen($row['page_text']) > 250 && !preg_match('/^\.\.\.(.+)\.\.\.$/', $row['page_text']) )
+ {
+ $row['page_text'] = substr($row['page_text'], 0, 150) . '...';
+ }
+ $row['page_name'] = highlight_search_result($row['page_name'], $word_list, $case_sensitive);
- $page_data[$row['id']] = $row;
- }
- while ( $row = $db->fetchrow() );
- }
- $db->free_result();
-
- //
- // STAGE 5 - SPECIAL PAGE TITLE SEARCH
- // Iterate through $paths->pages and check the titles for search terms. Score accordingly.
- //
+ $page_data[$row['id']] = $row;
+ }
+ while ( $row = $db->fetchrow() );
+ }
+ $db->free_result();
+
+ //
+ // STAGE 5 - SPECIAL PAGE TITLE SEARCH
+ // Iterate through $paths->pages and check the titles for search terms. Score accordingly.
+ //
- foreach ( $paths->pages as $id => $page )
- {
- if ( $page['namespace'] != 'Special' || $page['visible'] == 0 )
- continue;
- $idstring = 'ns=' . $page['namespace'] . ';pid=' . $page['urlname_nons'];
- $any = array_values(array_unique(array_merge($query['any'], $query_phrase['any'])));
- foreach ( $any as $term )
- {
- if ( $case_sensitive )
- {
- if ( strstr($page['name'], $term) || strstr($page['urlname_nons'], $term) )
- {
- ( isset($scores[$idstring]) ) ? $scores[$idstring] = $scores[$idstring] + 1.5 : $scores[$idstring] = 1.5;
- }
- }
- else
- {
- if ( stristr($page['name'], $term) || stristr($page['urlname_nons'], $term) )
- {
- ( isset($scores[$idstring]) ) ? $scores[$idstring] = $scores[$idstring] + 1.5 : $scores[$idstring] = 1.5;
- }
- }
- }
- if ( isset($scores[$idstring]) )
- {
- $page_data[$idstring] = array(
- 'page_name' => highlight_search_result($page['name'], $word_list, $case_sensitive),
- 'page_text' => '',
- 'page_id' => $page['urlname_nons'],
- 'namespace' => $page['namespace'],
- 'score' => $scores[$idstring],
- 'page_length' => 1,
- 'page_note' => '[' . $lang->get('search_result_tag_special') . ']'
- );
- }
- }
-
- //
- // STAGE 6 - SECOND ELIMINATION ROUND
- // Iterate through the list of required terms. If a given page is not found to have the required term, eliminate it
- //
+ foreach ( $paths->pages as $id => $page )
+ {
+ if ( $page['namespace'] != 'Special' || $page['visible'] == 0 )
+ continue;
+ $idstring = 'ns=' . $page['namespace'] . ';pid=' . $page['urlname_nons'];
+ $any = array_values(array_unique(array_merge($query['any'], $query_phrase['any'])));
+ foreach ( $any as $term )
+ {
+ if ( $case_sensitive )
+ {
+ if ( strstr($page['name'], $term) || strstr($page['urlname_nons'], $term) )
+ {
+ ( isset($scores[$idstring]) ) ? $scores[$idstring] = $scores[$idstring] + 1.5 : $scores[$idstring] = 1.5;
+ }
+ }
+ else
+ {
+ if ( stristr($page['name'], $term) || stristr($page['urlname_nons'], $term) )
+ {
+ ( isset($scores[$idstring]) ) ? $scores[$idstring] = $scores[$idstring] + 1.5 : $scores[$idstring] = 1.5;
+ }
+ }
+ }
+ if ( isset($scores[$idstring]) )
+ {
+ $page_data[$idstring] = array(
+ 'page_name' => highlight_search_result($page['name'], $word_list, $case_sensitive),
+ 'page_text' => '',
+ 'page_id' => $page['urlname_nons'],
+ 'namespace' => $page['namespace'],
+ 'score' => $scores[$idstring],
+ 'page_length' => 1,
+ 'page_note' => '[' . $lang->get('search_result_tag_special') . ']'
+ );
+ }
+ }
+
+ //
+ // STAGE 6 - SECOND ELIMINATION ROUND
+ // Iterate through the list of required terms. If a given page is not found to have the required term, eliminate it
+ //
- $required = array_merge($query['req'], $query_phrase['req']);
- foreach ( $required as $term )
- {
- foreach ( $page_data as $id => $page )
- {
- if ( ( $page['namespace'] == 'Special' || ( $page['namespace'] != 'Special' && !strstr($page['page_text'], $term) ) ) && !strstr($page['page_id'], $term) && !strstr($page['page_name'], $term) )
- {
- unset($page_data[$id]);
- }
- }
- }
+ $required = array_merge($query['req'], $query_phrase['req']);
+ foreach ( $required as $term )
+ {
+ foreach ( $page_data as $id => $page )
+ {
+ if ( ( $page['namespace'] == 'Special' || ( $page['namespace'] != 'Special' && !strstr($page['page_text'], $term) ) ) && !strstr($page['page_id'], $term) && !strstr($page['page_name'], $term) )
+ {
+ unset($page_data[$id]);
+ }
+ }
+ }
- // At this point, all of our normal results are in. However, we can also allow plugins to hook into the system and score their own
- // pages and add text, etc. as necessary.
- // Plugins are COMPLETELY responsible for using the search terms and handling Boolean logic properly
+ // At this point, all of our normal results are in. However, we can also allow plugins to hook into the system and score their own
+ // pages and add text, etc. as necessary.
+ // Plugins are COMPLETELY responsible for using the search terms and handling Boolean logic properly
- inject_custom_search_results($query, $query_phrase, $scores, $page_data, $case_sensitive, $word_list);
-
- $code = $plugins->setHook('search_global_inner');
- foreach ( $code as $cmd )
- {
- eval($cmd);
- }
+ inject_custom_search_results($query, $query_phrase, $scores, $page_data, $case_sensitive, $word_list);
+
+ $code = $plugins->setHook('search_global_inner');
+ foreach ( $code as $cmd )
+ {
+ eval($cmd);
+ }
- // a marvelous debugging aid :-)
- // die('<pre>' . htmlspecialchars(print_r($page_data, true)) . '</pre>');
+ // a marvelous debugging aid :-)
+ // die('<pre>' . htmlspecialchars(print_r($page_data, true)) . '</pre>');
- //
- // STAGE 7 - HIGHLIGHT, TRIM, AND SCORE RESULTS
- // We now have the complete results of the search. We need to trim text down to show only portions of the page containing search
- // terms, highlight any search terms within the page, and sort the final results array in descending order of score.
- //
+ //
+ // STAGE 7 - HIGHLIGHT, TRIM, AND SCORE RESULTS
+ // We now have the complete results of the search. We need to trim text down to show only portions of the page containing search
+ // terms, highlight any search terms within the page, and sort the final results array in descending order of score.
+ //
- // Sort scores array
- arsort($scores);
+ // Sort scores array
+ arsort($scores);
- // Divisor for calculating relevance scores
- $divisor = ( count($query['any']) + count($query_phrase['any']) + count($query['req']) + count($query['not']) ) * 1.5;
- $divisor = max($divisor, max($scores));
-
- foreach ( $scores as $page_id => $score )
- {
- if ( !isset($page_data[$page_id]) )
- // It's possible that $scores contains a score for a page that was later eliminated because it contained a disallowed term
- continue;
+ // Divisor for calculating relevance scores
+ $divisor = ( count($query['any']) + count($query_phrase['any']) + count($query['req']) + count($query['not']) ) * 1.5;
+ $divisor = max($divisor, max($scores));
+
+ foreach ( $scores as $page_id => $score )
+ {
+ if ( !isset($page_data[$page_id]) )
+ // It's possible that $scores contains a score for a page that was later eliminated because it contained a disallowed term
+ continue;
- // Make a copy of the datum, then delete the original (it frees up a LOT of RAM)
- $datum = $page_data[$page_id];
- unset($page_data[$page_id]);
+ // Make a copy of the datum, then delete the original (it frees up a LOT of RAM)
+ $datum = $page_data[$page_id];
+ unset($page_data[$page_id]);
- // This is an internal value used for sorting - it's no longer needed.
- unset($datum['id']);
+ // This is an internal value used for sorting - it's no longer needed.
+ unset($datum['id']);
- // Calculate score
- // if ( $score > $divisor )
- // $score = $divisor;
- $datum['score'] = round($score / $divisor, 2) * 100;
-
- // Highlight the URL
- $datum['url_highlight'] = makeUrlComplete($datum['namespace'], $datum['page_id']);
- $datum['url_highlight'] = preg_replace('/\?.+$/', '', $datum['url_highlight']);
- $datum['url_highlight'] = highlight_search_result($datum['url_highlight'], $word_list, $case_sensitive);
+ // Calculate score
+ // if ( $score > $divisor )
+ // $score = $divisor;
+ $datum['score'] = round($score / $divisor, 2) * 100;
+
+ // Highlight the URL
+ $datum['url_highlight'] = makeUrlComplete($datum['namespace'], $datum['page_id']);
+ $datum['url_highlight'] = preg_replace('/\?.+$/', '', $datum['url_highlight']);
+ $datum['url_highlight'] = highlight_search_result($datum['url_highlight'], $word_list, $case_sensitive);
- // Store it in our until-now-unused results array
- $results[] = $datum;
- }
+ // Store it in our until-now-unused results array
+ $results[] = $datum;
+ }
- // Our work here is done. :-D
- return $results;
+ // Our work here is done. :-D
+ return $results;
}
/**
@@ -594,166 +594,166 @@
function parse_search_query($query, &$warnings)
{
- global $lang;
-
- $stopwords = get_stopwords();
- $ret = array(
- 'any' => array(),
- 'req' => array(),
- 'not' => array()
- );
- $warnings = array();
- $terms = array();
- $in_quote = false;
- $start_term = 0;
- $just_finished = false;
- for ( $i = 0; $i < strlen($query); $i++ )
- {
- $chr = $query{$i};
- $prev = ( $i > 0 ) ? $query{ $i - 1 } : '';
- $next = ( ( $i + 1 ) < strlen($query) ) ? $query{ $i + 1 } : '';
+ global $lang;
+
+ $stopwords = get_stopwords();
+ $ret = array(
+ 'any' => array(),
+ 'req' => array(),
+ 'not' => array()
+ );
+ $warnings = array();
+ $terms = array();
+ $in_quote = false;
+ $start_term = 0;
+ $just_finished = false;
+ for ( $i = 0; $i < strlen($query); $i++ )
+ {
+ $chr = $query{$i};
+ $prev = ( $i > 0 ) ? $query{ $i - 1 } : '';
+ $next = ( ( $i + 1 ) < strlen($query) ) ? $query{ $i + 1 } : '';
- if ( ( $chr == ' ' && !$in_quote ) || ( $i + 1 == strlen ( $query ) ) )
- {
- $len = ( $next == '' ) ? $i + 1 : $i - $start_term;
- $word = substr ( $query, $start_term, $len );
- $terms[] = $word;
- $start_term = $i + 1;
- }
+ if ( ( $chr == ' ' && !$in_quote ) || ( $i + 1 == strlen ( $query ) ) )
+ {
+ $len = ( $next == '' ) ? $i + 1 : $i - $start_term;
+ $word = substr ( $query, $start_term, $len );
+ $terms[] = $word;
+ $start_term = $i + 1;
+ }
- elseif ( $chr == '"' && $in_quote && $prev != '\\' )
- {
- $word = substr ( $query, $start_term, $i - $start_term + 1 );
- $start_pos = ( $next == ' ' ) ? $i + 2 : $i + 1;
- $in_quote = false;
- }
+ elseif ( $chr == '"' && $in_quote && $prev != '\\' )
+ {
+ $word = substr ( $query, $start_term, $i - $start_term + 1 );
+ $start_pos = ( $next == ' ' ) ? $i + 2 : $i + 1;
+ $in_quote = false;
+ }
- elseif ( $chr == '"' && !$in_quote )
- {
- $in_quote = true;
- $start_pos = $i;
- }
+ elseif ( $chr == '"' && !$in_quote )
+ {
+ $in_quote = true;
+ $start_pos = $i;
+ }
- }
+ }
- $ticker = 0;
+ $ticker = 0;
- foreach ( $terms as $element => $__unused )
- {
- $atom =& $terms[$element];
+ foreach ( $terms as $element => $__unused )
+ {
+ $atom =& $terms[$element];
- $ticker++;
+ $ticker++;
- if ( $ticker == 20 )
- {
- $warnings[] = $lang->get('search_err_query_too_many_terms');
- break;
- }
+ if ( $ticker == 20 )
+ {
+ $warnings[] = $lang->get('search_err_query_too_many_terms');
+ break;
+ }
- if ( substr ( $atom, 0, 2 ) == '+"' && substr ( $atom, ( strlen ( $atom ) - 1 ), 1 ) == '"' )
- {
- $word = substr ( $atom, 2, ( strlen( $atom ) - 3 ) );
- if ( strlen ( $word ) < 2 || in_array($word, $stopwords) )
- {
- $warnings[] = $lang->get('search_err_query_has_stopwords');
- $ticker--;
- continue;
- }
- if(in_array($word, $ret['req']))
- {
- $warnings[] = $lang->get('search_err_query_dup_terms');
- $ticker--;
- continue;
- }
- $ret['req'][] = $word;
- }
- elseif ( substr ( $atom, 0, 2 ) == '-"' && substr ( $atom, ( strlen ( $atom ) - 1 ), 1 ) == '"' )
- {
- $word = substr ( $atom, 2, ( strlen( $atom ) - 3 ) );
- if ( strlen ( $word ) < 4 )
- {
- $warnings[] = $lang->get('search_err_query_term_too_short');
- $ticker--;
- continue;
- }
- if(in_array($word, $ret['not']))
- {
- $warnings[] = $lang->get('search_err_query_dup_terms');
- $ticker--;
- continue;
- }
- $ret['not'][] = $word;
- }
- elseif ( substr ( $atom, 0, 1 ) == '+' )
- {
- $word = substr ( $atom, 1 );
- if ( strlen ( $word ) < 2 || in_array($word, $stopwords) )
- {
- $warnings[] = $lang->get('search_err_query_has_stopwords');
- $ticker--;
- continue;
- }
- if(in_array($word, $ret['req']))
- {
- $warnings[] = $lang->get('search_err_query_dup_terms');
- $ticker--;
- continue;
- }
- $ret['req'][] = $word;
- }
- elseif ( substr ( $atom, 0, 1 ) == '-' )
- {
- $word = substr ( $atom, 1 );
- if ( strlen ( $word ) < 2 || in_array($word, $stopwords) )
- {
- $warnings[] = $lang->get('search_err_query_has_stopwords');
- $ticker--;
- continue;
- }
- if(in_array($word, $ret['not']))
- {
- $warnings[] = $lang->get('search_err_query_dup_terms');
- $ticker--;
- continue;
- }
- $ret['not'][] = $word;
- }
- elseif ( substr ( $atom, 0, 1 ) == '"' && substr ( $atom, ( strlen($atom) - 1 ), 1 ) == '"' )
- {
- $word = substr ( $atom, 1, ( strlen ( $atom ) - 2 ) );
- if ( strlen ( $word ) < 2 || in_array($word, $stopwords) )
- {
- $warnings[] = $lang->get('search_err_query_has_stopwords');
- $ticker--;
- continue;
- }
- if(in_array($word, $ret['any']))
- {
- $warnings[] = $lang->get('search_err_query_dup_terms');
- $ticker--;
- continue;
- }
- $ret['any'][] = $word;
- }
- else
- {
- $word = $atom;
- if ( strlen ( $word ) < 2 || in_array($word, $stopwords) )
- {
- $warnings[] = $lang->get('search_err_query_has_stopwords');
- $ticker--;
- continue;
- }
- if(in_array($word, $ret['any']))
- {
- $warnings[] = $lang->get('search_err_query_dup_terms');
- $ticker--;
- continue;
- }
- $ret['any'][] = $word;
- }
- }
- return $ret;
+ if ( substr ( $atom, 0, 2 ) == '+"' && substr ( $atom, ( strlen ( $atom ) - 1 ), 1 ) == '"' )
+ {
+ $word = substr ( $atom, 2, ( strlen( $atom ) - 3 ) );
+ if ( strlen ( $word ) < 2 || in_array($word, $stopwords) )
+ {
+ $warnings[] = $lang->get('search_err_query_has_stopwords');
+ $ticker--;
+ continue;
+ }
+ if(in_array($word, $ret['req']))
+ {
+ $warnings[] = $lang->get('search_err_query_dup_terms');
+ $ticker--;
+ continue;
+ }
+ $ret['req'][] = $word;
+ }
+ elseif ( substr ( $atom, 0, 2 ) == '-"' && substr ( $atom, ( strlen ( $atom ) - 1 ), 1 ) == '"' )
+ {
+ $word = substr ( $atom, 2, ( strlen( $atom ) - 3 ) );
+ if ( strlen ( $word ) < 4 )
+ {
+ $warnings[] = $lang->get('search_err_query_term_too_short');
+ $ticker--;
+ continue;
+ }
+ if(in_array($word, $ret['not']))
+ {
+ $warnings[] = $lang->get('search_err_query_dup_terms');
+ $ticker--;
+ continue;
+ }
+ $ret['not'][] = $word;
+ }
+ elseif ( substr ( $atom, 0, 1 ) == '+' )
+ {
+ $word = substr ( $atom, 1 );
+ if ( strlen ( $word ) < 2 || in_array($word, $stopwords) )
+ {
+ $warnings[] = $lang->get('search_err_query_has_stopwords');
+ $ticker--;
+ continue;
+ }
+ if(in_array($word, $ret['req']))
+ {
+ $warnings[] = $lang->get('search_err_query_dup_terms');
+ $ticker--;
+ continue;
+ }
+ $ret['req'][] = $word;
+ }
+ elseif ( substr ( $atom, 0, 1 ) == '-' )
+ {
+ $word = substr ( $atom, 1 );
+ if ( strlen ( $word ) < 2 || in_array($word, $stopwords) )
+ {
+ $warnings[] = $lang->get('search_err_query_has_stopwords');
+ $ticker--;
+ continue;
+ }
+ if(in_array($word, $ret['not']))
+ {
+ $warnings[] = $lang->get('search_err_query_dup_terms');
+ $ticker--;
+ continue;
+ }
+ $ret['not'][] = $word;
+ }
+ elseif ( substr ( $atom, 0, 1 ) == '"' && substr ( $atom, ( strlen($atom) - 1 ), 1 ) == '"' )
+ {
+ $word = substr ( $atom, 1, ( strlen ( $atom ) - 2 ) );
+ if ( strlen ( $word ) < 2 || in_array($word, $stopwords) )
+ {
+ $warnings[] = $lang->get('search_err_query_has_stopwords');
+ $ticker--;
+ continue;
+ }
+ if(in_array($word, $ret['any']))
+ {
+ $warnings[] = $lang->get('search_err_query_dup_terms');
+ $ticker--;
+ continue;
+ }
+ $ret['any'][] = $word;
+ }
+ else
+ {
+ $word = $atom;
+ if ( strlen ( $word ) < 2 || in_array($word, $stopwords) )
+ {
+ $warnings[] = $lang->get('search_err_query_has_stopwords');
+ $ticker--;
+ continue;
+ }
+ if(in_array($word, $ret['any']))
+ {
+ $warnings[] = $lang->get('search_err_query_dup_terms');
+ $ticker--;
+ continue;
+ }
+ $ret['any'][] = $word;
+ }
+ }
+ return $ret;
}
/**
@@ -764,10 +764,10 @@
function escape_string_like($string)
{
- global $db, $session, $paths, $template, $plugins; // Common objects
- $string = $db->escape($string);
- $string = str_replace(array('%', '_'), array('\%', '\_'), $string);
- return $string;
+ global $db, $session, $paths, $template, $plugins; // Common objects
+ $string = $db->escape($string);
+ $string = str_replace(array('%', '_'), array('\%', '\_'), $string);
+ return $string;
}
/**
@@ -780,18 +780,18 @@
function highlight_search_result($pt, $words, $case_sensitive = false)
{
- $words2 = array();
- for ( $i = 0; $i < sizeof($words); $i++)
- {
- if(!empty($words[$i]))
- $words2[] = preg_quote($words[$i]);
- }
+ $words2 = array();
+ for ( $i = 0; $i < sizeof($words); $i++)
+ {
+ if(!empty($words[$i]))
+ $words2[] = preg_quote($words[$i]);
+ }
- $flag = ( $case_sensitive ) ? '' : 'i';
- $regex = '/(' . implode('|', str_replace('/', '\\/', $words2)) . ')/' . $flag;
- $pt = preg_replace($regex, '<highlight>\\1</highlight>', $pt);
+ $flag = ( $case_sensitive ) ? '' : 'i';
+ $regex = '/(' . implode('|', str_replace('/', '\\/', $words2)) . ')/' . $flag;
+ $pt = preg_replace($regex, '<highlight>\\1</highlight>', $pt);
- return $pt;
+ return $pt;
}
/**
@@ -805,95 +805,95 @@
function highlight_and_clip_search_result($pt, $words, $case_sensitive = false)
{
- $cut_off = false;
+ $cut_off = false;
- $space_chars = Array("\t", "\n", "\r", " ");
+ $space_chars = Array("\t", "\n", "\r", " ");
- $pt = highlight_search_result($pt, $words, $case_sensitive);
+ $pt = highlight_search_result($pt, $words, $case_sensitive);
- foreach ( $words as $word )
- {
- // Boldface searched words
- $ptlen = strlen($pt);
- for ( $i = 0; $i < $ptlen; $i++ )
- {
- $len = strlen($word);
- if ( strtolower(substr($pt, $i, $len)) == strtolower($word) )
- {
- $chunk1 = substr($pt, 0, $i);
- $chunk2 = substr($pt, $i, $len);
- $chunk3 = substr($pt, ( $i + $len ));
- $pt = $chunk1 . $chunk2 . $chunk3;
- $ptlen = strlen($pt);
- // Cut off text to 150 chars or so
- if ( !$cut_off )
- {
- $cut_off = true;
- if ( $i - 75 > 0 )
- {
- // Navigate backwards until a space character is found
- $chunk = substr($pt, 0, ( $i - 75 ));
- $final_chunk = $chunk;
- for ( $j = strlen($chunk) - 1; $j > 0; $j = $j - 1 )
- {
- if ( in_array($chunk{$j}, $space_chars) )
- {
- $final_chunk = substr($chunk, $j + 1);
- break;
- }
- }
- $mid_chunk = substr($pt, ( $i - 75 ), 75);
+ foreach ( $words as $word )
+ {
+ // Boldface searched words
+ $ptlen = strlen($pt);
+ for ( $i = 0; $i < $ptlen; $i++ )
+ {
+ $len = strlen($word);
+ if ( strtolower(substr($pt, $i, $len)) == strtolower($word) )
+ {
+ $chunk1 = substr($pt, 0, $i);
+ $chunk2 = substr($pt, $i, $len);
+ $chunk3 = substr($pt, ( $i + $len ));
+ $pt = $chunk1 . $chunk2 . $chunk3;
+ $ptlen = strlen($pt);
+ // Cut off text to 150 chars or so
+ if ( !$cut_off )
+ {
+ $cut_off = true;
+ if ( $i - 75 > 0 )
+ {
+ // Navigate backwards until a space character is found
+ $chunk = substr($pt, 0, ( $i - 75 ));
+ $final_chunk = $chunk;
+ for ( $j = strlen($chunk) - 1; $j > 0; $j = $j - 1 )
+ {
+ if ( in_array($chunk{$j}, $space_chars) )
+ {
+ $final_chunk = substr($chunk, $j + 1);
+ break;
+ }
+ }
+ $mid_chunk = substr($pt, ( $i - 75 ), 75);
- $clipped = '...' . $final_chunk . $mid_chunk . $chunk2;
+ $clipped = '...' . $final_chunk . $mid_chunk . $chunk2;
- $chunk = substr($pt, ( $i + strlen($chunk2) + 75 ));
- $final_chunk = $chunk;
- for ( $j = 0; $j < strlen($chunk); $j++ )
- {
- if ( in_array($chunk{$j}, $space_chars) )
- {
- $final_chunk = substr($chunk, 0, $j);
- break;
- }
- }
+ $chunk = substr($pt, ( $i + strlen($chunk2) + 75 ));
+ $final_chunk = $chunk;
+ for ( $j = 0; $j < strlen($chunk); $j++ )
+ {
+ if ( in_array($chunk{$j}, $space_chars) )
+ {
+ $final_chunk = substr($chunk, 0, $j);
+ break;
+ }
+ }
- $end_chunk = substr($pt, ( $i + strlen($chunk2) ), 75 );
+ $end_chunk = substr($pt, ( $i + strlen($chunk2) ), 75 );
- $clipped .= $end_chunk . $final_chunk . '...';
+ $clipped .= $end_chunk . $final_chunk . '...';
- $pt = $clipped;
- }
- else if ( strlen($pt) > 200 )
- {
- $mid_chunk = substr($pt, ( $i - 75 ), 75);
+ $pt = $clipped;
+ }
+ else if ( strlen($pt) > 200 )
+ {
+ $mid_chunk = substr($pt, ( $i - 75 ), 75);
- $clipped = $chunk1 . $chunk2;
+ $clipped = $chunk1 . $chunk2;
- $chunk = substr($pt, ( $i + strlen($chunk2) + 75 ));
- $final_chunk = $chunk;
- for ( $j = 0; $j < strlen($chunk); $j++ )
- {
- if ( in_array($chunk{$j}, $space_chars) )
- {
- $final_chunk = substr($chunk, 0, $j);
- break;
- }
- }
+ $chunk = substr($pt, ( $i + strlen($chunk2) + 75 ));
+ $final_chunk = $chunk;
+ for ( $j = 0; $j < strlen($chunk); $j++ )
+ {
+ if ( in_array($chunk{$j}, $space_chars) )
+ {
+ $final_chunk = substr($chunk, 0, $j);
+ break;
+ }
+ }
- $end_chunk = substr($pt, ( $i + strlen($chunk2) ), 75 );
+ $end_chunk = substr($pt, ( $i + strlen($chunk2) ), 75 );
- $clipped .= $end_chunk . $final_chunk . '...';
+ $clipped .= $end_chunk . $final_chunk . '...';
- $pt = $clipped;
+ $pt = $clipped;
- }
- break 2;
- }
- }
- }
- $cut_off = false;
- }
- return $pt;
+ }
+ break 2;
+ }
+ }
+ }
+ $cut_off = false;
+ }
+ return $pt;
}
/**
@@ -903,15 +903,15 @@
function get_stopwords()
{
- static $stopwords;
- if ( is_array($stopwords) )
- return $stopwords;
+ static $stopwords;
+ if ( is_array($stopwords) )
+ return $stopwords;
- $stopwords = array('I', 'a', 'about', 'an', 'are', 'as', 'at', 'be', 'by', 'com', 'de', 'en', 'for', 'from', 'how', 'in', 'is', 'it',
- 'la', 'of', 'on', 'or', 'that', 'the', 'this', 'to', 'was', 'what', 'when', 'where', 'who', 'will', 'with', 'and',
- 'the');
-
- return $stopwords;
+ $stopwords = array('I', 'a', 'about', 'an', 'are', 'as', 'at', 'be', 'by', 'com', 'de', 'en', 'for', 'from', 'how', 'in', 'is', 'it',
+ 'la', 'of', 'on', 'or', 'that', 'the', 'this', 'to', 'was', 'what', 'when', 'where', 'who', 'will', 'with', 'and',
+ 'the');
+
+ return $stopwords;
}
/**
@@ -920,182 +920,182 @@
function inject_custom_search_results(&$query, &$query_phrase, &$scores, &$page_data, &$case_sensitive, &$word_list)
{
- global $db, $session, $paths, $template, $plugins; // Common objects
- global $lang;
-
- global $search_handlers;
-
- // global functions
- $terms = array(
- 'any' => array_merge($query['any'], $query_phrase['any']),
- 'req' => array_merge($query['req'], $query_phrase['req']),
- 'not' => $query['not']
- );
-
- foreach ( $search_handlers as &$options )
- {
- $where = array('any' => array(), 'req' => array(), 'not' => array());
- $where_any =& $where['any'];
- $where_req =& $where['req'];
- $where_not =& $where['not'];
- $title_col = ( $case_sensitive ) ? $options['titlecolumn'] : ENANO_SQLFUNC_LOWERCASE . '(' . $options['titlecolumn'] . ')';
- if ( isset($options['datacolumn']) )
- $desc_col = ( $case_sensitive ) ? $options['datacolumn'] : ENANO_SQLFUNC_LOWERCASE . '(' . $options['datacolumn'] . ')';
- else
- $desc_col = "''";
- foreach ( $terms['any'] as $term )
- {
- $term = escape_string_like($term);
- if ( !$case_sensitive )
- $term = strtolower($term);
- $where_any[] = "( $title_col LIKE '%{$term}%' OR $desc_col LIKE '%{$term}%' )";
- }
- foreach ( $terms['req'] as $term )
- {
- $term = escape_string_like($term);
- if ( !$case_sensitive )
- $term = strtolower($term);
- $where_req[] = "( $title_col LIKE '%{$term}%' OR $desc_col LIKE '%{$term}%' )";
- }
- foreach ( $terms['not'] as $term )
- {
- $term = escape_string_like($term);
- if ( !$case_sensitive )
- $term = strtolower($term);
- $where_not[] = "$title_col NOT LIKE '%{$term}%' AND $desc_col NOT LIKE '%{$term}%'";
- }
- if ( empty($where_any) )
- unset($where_any, $where['any']);
- if ( empty($where_req) )
- unset($where_req, $where['req']);
- if ( empty($where_not) )
- unset($where_not, $where['not']);
-
- $where_any = '(' . implode(' OR ', $where_any) . '' . ( isset($where['req']) || isset($where['not']) ? ' OR 1 = 1' : '' ) . ')';
-
- if ( isset($where_req) )
- $where_req = implode(' AND ', $where_req);
- if ( isset($where_not) )
- $where_not = implode( 'AND ', $where_not);
-
- $where = implode(' AND ', $where);
-
- $columns = $options['titlecolumn'];
- if ( isset($options['datacolumn']) )
- $columns .= ", {$options['datacolumn']}";
- if ( isset($options['additionalcolumns']) )
- $columns .= ', ' . implode(', ', $options['additionalcolumns']);
-
- $additionalwhere = ( isset($options['additionalwhere']) ) ? $options['additionalwhere'] : '';
-
- $sql = "SELECT $columns FROM " . table_prefix . "{$options['table']} WHERE ( $where ) $additionalwhere;";
-
- if ( !($q = $db->sql_unbuffered_query($sql)) )
- {
- $db->_die('Automatically generated search query');
- }
-
- if ( $row = $db->fetchrow() )
- {
- do
- {
- $parser = $template->makeParserText($options['uniqueid']);
- $parser->assign_vars($row);
- $idstring = $parser->run();
-
- // Score this result
- foreach ( $word_list as $term )
- {
- if ( $case_sensitive )
- {
- if ( strstr($row[$options['titlecolumn']], $term) )
- {
- ( isset($scores[$idstring]) ) ? $scores[$idstring] += 1.5 : $scores[$idstring] = 1.5;
- }
- else if ( isset($options['datacolumn']) && strstr($row[$options['datacolumn']], $term) )
- {
- ( isset($scores[$idstring]) ) ? $scores[$idstring]++ : $scores[$idstring] = 1;
- }
- }
- else
- {
- if ( stristr($row[$options['titlecolumn']], $term) )
- {
- ( isset($scores[$idstring]) ) ? $scores[$idstring] += 1.5 : $scores[$idstring] = 1.5;
- }
- else if ( isset($options['datacolumn']) && stristr($row[$options['datacolumn']], $term) )
- {
- ( isset($scores[$idstring]) ) ? $scores[$idstring]++ : $scores[$idstring] = 1;
- }
- }
- }
- // Generate text...
- $text = '';
- if ( isset($options['datacolumn']) && !isset($options['formatcallback']) )
- {
- $text = highlight_and_clip_search_result(htmlspecialchars($row[$options['datacolumn']]), $word_list);
- }
- else if ( isset($options['formatcallback']) )
- {
- if ( is_callable($options['formatcallback']) )
- {
- $text = call_user_func($options['formatcallback'], $row, $word_list);
- }
- else
- {
- $parser = $template->makeParserText($options['formatcallback']);
- $parser->assign_vars($row);
- $text = $parser->run();
- }
- }
-
- // Inject result
-
- if ( isset($scores[$idstring]) )
- {
- $parser = $template->makeParserText($options['linkformat']['page_id']);
- $parser->assign_vars($row);
- $page_id = $parser->run();
-
- $parser = $template->makeParserText($options['linkformat']['namespace']);
- $parser->assign_vars($row);
- $namespace = $parser->run();
-
- $page_data[$idstring] = array(
- 'page_name' => highlight_search_result(htmlspecialchars($row[$options['titlecolumn']]), $word_list),
- 'page_text' => $text,
- 'score' => $scores[$idstring],
- 'page_id' => $page_id,
- 'namespace' => $namespace,
- );
-
- // Any additional flags that need to be added to the result?
- // The small usually-bracketed text to the left of the title
- if ( isset($options['resultnote']) )
- {
- $page_data[$idstring]['page_note'] = $options['resultnote'];
- }
- // Should we include the length?
- if ( isset($options['datacolumn']) )
- {
- $page_data[$idstring]['page_length'] = strlen($row[$options['datacolumn']]);
- }
- else
- {
- $page_data[$idstring]['page_length'] = 0;
- $page_data[$idstring]['zero_length'] = true;
- }
- // Anything to append to result links?
- if ( isset($options['linkformat']['append']) )
- {
- $page_data[$idstring]['url_append'] = $options['linkformat']['append'];
- }
- }
- }
- while ( $row = $db->fetchrow($q) );
- $db->free_result($q);
- }
- }
+ global $db, $session, $paths, $template, $plugins; // Common objects
+ global $lang;
+
+ global $search_handlers;
+
+ // global functions
+ $terms = array(
+ 'any' => array_merge($query['any'], $query_phrase['any']),
+ 'req' => array_merge($query['req'], $query_phrase['req']),
+ 'not' => $query['not']
+ );
+
+ foreach ( $search_handlers as &$options )
+ {
+ $where = array('any' => array(), 'req' => array(), 'not' => array());
+ $where_any =& $where['any'];
+ $where_req =& $where['req'];
+ $where_not =& $where['not'];
+ $title_col = ( $case_sensitive ) ? $options['titlecolumn'] : ENANO_SQLFUNC_LOWERCASE . '(' . $options['titlecolumn'] . ')';
+ if ( isset($options['datacolumn']) )
+ $desc_col = ( $case_sensitive ) ? $options['datacolumn'] : ENANO_SQLFUNC_LOWERCASE . '(' . $options['datacolumn'] . ')';
+ else
+ $desc_col = "''";
+ foreach ( $terms['any'] as $term )
+ {
+ $term = escape_string_like($term);
+ if ( !$case_sensitive )
+ $term = strtolower($term);
+ $where_any[] = "( $title_col LIKE '%{$term}%' OR $desc_col LIKE '%{$term}%' )";
+ }
+ foreach ( $terms['req'] as $term )
+ {
+ $term = escape_string_like($term);
+ if ( !$case_sensitive )
+ $term = strtolower($term);
+ $where_req[] = "( $title_col LIKE '%{$term}%' OR $desc_col LIKE '%{$term}%' )";
+ }
+ foreach ( $terms['not'] as $term )
+ {
+ $term = escape_string_like($term);
+ if ( !$case_sensitive )
+ $term = strtolower($term);
+ $where_not[] = "$title_col NOT LIKE '%{$term}%' AND $desc_col NOT LIKE '%{$term}%'";
+ }
+ if ( empty($where_any) )
+ unset($where_any, $where['any']);
+ if ( empty($where_req) )
+ unset($where_req, $where['req']);
+ if ( empty($where_not) )
+ unset($where_not, $where['not']);
+
+ $where_any = '(' . implode(' OR ', $where_any) . '' . ( isset($where['req']) || isset($where['not']) ? ' OR 1 = 1' : '' ) . ')';
+
+ if ( isset($where_req) )
+ $where_req = implode(' AND ', $where_req);
+ if ( isset($where_not) )
+ $where_not = implode( 'AND ', $where_not);
+
+ $where = implode(' AND ', $where);
+
+ $columns = $options['titlecolumn'];
+ if ( isset($options['datacolumn']) )
+ $columns .= ", {$options['datacolumn']}";
+ if ( isset($options['additionalcolumns']) )
+ $columns .= ', ' . implode(', ', $options['additionalcolumns']);
+
+ $additionalwhere = ( isset($options['additionalwhere']) ) ? $options['additionalwhere'] : '';
+
+ $sql = "SELECT $columns FROM " . table_prefix . "{$options['table']} WHERE ( $where ) $additionalwhere;";
+
+ if ( !($q = $db->sql_unbuffered_query($sql)) )
+ {
+ $db->_die('Automatically generated search query');
+ }
+
+ if ( $row = $db->fetchrow() )
+ {
+ do
+ {
+ $parser = $template->makeParserText($options['uniqueid']);
+ $parser->assign_vars($row);
+ $idstring = $parser->run();
+
+ // Score this result
+ foreach ( $word_list as $term )
+ {
+ if ( $case_sensitive )
+ {
+ if ( strstr($row[$options['titlecolumn']], $term) )
+ {
+ ( isset($scores[$idstring]) ) ? $scores[$idstring] += 1.5 : $scores[$idstring] = 1.5;
+ }
+ else if ( isset($options['datacolumn']) && strstr($row[$options['datacolumn']], $term) )
+ {
+ ( isset($scores[$idstring]) ) ? $scores[$idstring]++ : $scores[$idstring] = 1;
+ }
+ }
+ else
+ {
+ if ( stristr($row[$options['titlecolumn']], $term) )
+ {
+ ( isset($scores[$idstring]) ) ? $scores[$idstring] += 1.5 : $scores[$idstring] = 1.5;
+ }
+ else if ( isset($options['datacolumn']) && stristr($row[$options['datacolumn']], $term) )
+ {
+ ( isset($scores[$idstring]) ) ? $scores[$idstring]++ : $scores[$idstring] = 1;
+ }
+ }
+ }
+ // Generate text...
+ $text = '';
+ if ( isset($options['datacolumn']) && !isset($options['formatcallback']) )
+ {
+ $text = highlight_and_clip_search_result(htmlspecialchars($row[$options['datacolumn']]), $word_list);
+ }
+ else if ( isset($options['formatcallback']) )
+ {
+ if ( is_callable($options['formatcallback']) )
+ {
+ $text = call_user_func($options['formatcallback'], $row, $word_list);
+ }
+ else
+ {
+ $parser = $template->makeParserText($options['formatcallback']);
+ $parser->assign_vars($row);
+ $text = $parser->run();
+ }
+ }
+
+ // Inject result
+
+ if ( isset($scores[$idstring]) )
+ {
+ $parser = $template->makeParserText($options['linkformat']['page_id']);
+ $parser->assign_vars($row);
+ $page_id = $parser->run();
+
+ $parser = $template->makeParserText($options['linkformat']['namespace']);
+ $parser->assign_vars($row);
+ $namespace = $parser->run();
+
+ $page_data[$idstring] = array(
+ 'page_name' => highlight_search_result(htmlspecialchars($row[$options['titlecolumn']]), $word_list),
+ 'page_text' => $text,
+ 'score' => $scores[$idstring],
+ 'page_id' => $page_id,
+ 'namespace' => $namespace,
+ );
+
+ // Any additional flags that need to be added to the result?
+ // The small usually-bracketed text to the left of the title
+ if ( isset($options['resultnote']) )
+ {
+ $page_data[$idstring]['page_note'] = $options['resultnote'];
+ }
+ // Should we include the length?
+ if ( isset($options['datacolumn']) )
+ {
+ $page_data[$idstring]['page_length'] = strlen($row[$options['datacolumn']]);
+ }
+ else
+ {
+ $page_data[$idstring]['page_length'] = 0;
+ $page_data[$idstring]['zero_length'] = true;
+ }
+ // Anything to append to result links?
+ if ( isset($options['linkformat']['append']) )
+ {
+ $page_data[$idstring]['url_append'] = $options['linkformat']['append'];
+ }
+ }
+ }
+ while ( $row = $db->fetchrow($q) );
+ $db->free_result($q);
+ }
+ }
}
?>