author | Dan |
Mon, 01 Feb 2010 02:14:02 -0500 | |
changeset 1215 | ec7f8f6312bb |
parent 1201 | 9593e62929d1 |
child 1227 | bdac73ed481e |
permissions | -rw-r--r-- |
1 | 1 |
<?php |
2 |
||
3 |
/* |
|
4 |
* Enano - an open-source CMS capable of wiki functions, Drupal-like sidebar blocks, and everything in between |
|
1081
745200a9cc2a
Fixed some upgrade bugs; added support for choosing one's own date/time formats; rebrand as 1.1.7
Dan
parents:
953
diff
changeset
|
5 |
* Copyright (C) 2006-2009 Dan Fuhry |
1 | 6 |
* search.php - algorithm used to search pages |
7 |
* |
|
8 |
* This program is Free Software; you can redistribute and/or modify it under the terms of the GNU General Public License |
|
9 |
* as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. |
|
10 |
* |
|
11 |
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied |
|
12 |
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for details. |
|
13 |
*/ |
|
14 |
||
15 |
/** |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
16 |
* In Enano versions prior to 1.0.2, this class provided a search function that was keyword-based and allowed boolean searches. It was |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
17 |
* cut from Coblynau and replaced with perform_search(), later in this file, because of speed issues. Now mostly deprecated. The only |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
18 |
* thing remaining is the buildIndex function, which is still used by the path manager and the new search framework. |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
19 |
* |
1 | 20 |
* @package Enano |
21 |
* @subpackage Page management frontend |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
22 |
* @license GNU General Public License <http://enanocms.org/Special:GNU_General_Public_License> |
1 | 23 |
*/ |
24 |
||
25 |
class Searcher |
|
26 |
{ |
|
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
27 |
|
1 | 28 |
var $results; |
29 |
var $index; |
|
30 |
var $warnings; |
|
31 |
var $match_case = false; |
|
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
32 |
|
1 | 33 |
function buildIndex($texts) |
34 |
{ |
|
35 |
$this->index = Array(); |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
36 |
$stopwords = get_stopwords(); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
37 |
|
1 | 38 |
foreach($texts as $i => $l) |
39 |
{ |
|
40 |
$seed = md5(microtime(true) . mt_rand()); |
|
41 |
$texts[$i] = str_replace("'", 'xxxApoS'.$seed.'xxx', $texts[$i]); |
|
42 |
$texts[$i] = preg_replace('#([\W_]+)#i', ' ', $texts[$i]); |
|
43 |
$texts[$i] = preg_replace('#([ ]+?)#', ' ', $texts[$i]); |
|
44 |
$texts[$i] = preg_replace('#([\']*){2,}#s', '', $texts[$i]); |
|
45 |
$texts[$i] = str_replace('xxxApoS'.$seed.'xxx', "'", $texts[$i]); |
|
46 |
$l = $texts[$i]; |
|
47 |
$words = Array(); |
|
48 |
$good_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\' '; |
|
49 |
$good_chars = enano_str_split($good_chars, 1); |
|
50 |
$letters = enano_str_split($l, 1); |
|
51 |
foreach($letters as $x => $t) |
|
52 |
{ |
|
53 |
if(!in_array($t, $good_chars)) |
|
54 |
unset($letters[$x]); |
|
55 |
} |
|
56 |
$letters = implode('', $letters); |
|
57 |
$words = explode(' ', $letters); |
|
58 |
foreach($words as $c => $w) |
|
59 |
{ |
|
371
dc6026376919
Improved compatibility with PostgreSQL and fixed a number of installer bugs; fixed missing "meta" category declaration in language files
Dan
parents:
340
diff
changeset
|
60 |
if(strlen($w) < 2 || in_array($w, $stopwords) || strlen($w) > 63 || preg_match('/[\']{2,}/', $w)) |
1 | 61 |
unset($words[$c]); |
62 |
else |
|
63 |
$words[$c] = $w; |
|
64 |
} |
|
65 |
$words = array_values($words); |
|
66 |
foreach($words as $c => $w) |
|
67 |
{ |
|
68 |
if(isset($this->index[$w])) |
|
69 |
{ |
|
70 |
if(!in_array($i, $this->index[$w])) |
|
71 |
$this->index[$w][] = $i; |
|
72 |
} |
|
73 |
else |
|
74 |
{ |
|
75 |
$this->index[$w] = Array(); |
|
76 |
$this->index[$w][] = $i; |
|
77 |
} |
|
78 |
} |
|
79 |
} |
|
80 |
foreach($this->index as $k => $v) |
|
81 |
{ |
|
82 |
$this->index[$k] = implode(',', $this->index[$k]); |
|
83 |
} |
|
84 |
} |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
85 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
86 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
87 |
/** |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
88 |
* Searches the site for the specified string and returns an array with each value being an array filled with the following: |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
89 |
* page_id: string, self-explanatory |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
90 |
* namespace: string, self-explanatory |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
91 |
* page_length: integer, the length of the full page in bytes |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
92 |
* page_text: string, the contents of the page (trimmed to ~150 bytes if necessary) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
93 |
* score: numerical relevance score, 1-100, rounded to 2 digits and calculated based on which terms were present and which were not |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
94 |
* @param string Search query |
499
6b7fdd898ba3
Fixed some bugs with PostgreSQL and added a word_lcase column to the search_index table because collation is not working under MySQL. TODO: Trigger search index rebuild on upgrade to 1.1.4.
Dan
parents:
461
diff
changeset
|
95 |
* @param string|reference Will be filled with any warnings encountered whilst parsing the query |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
96 |
* @param bool Case sensitivity - defaults to false |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
97 |
* @param array|reference Will be filled with the parsed list of words. |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
98 |
* @return array |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
99 |
*/ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
100 |
|
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
101 |
function perform_search($query, &$warnings, $case_sensitive = false, &$word_list) |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
102 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
103 |
global $db, $session, $paths, $template, $plugins; // Common objects |
335
67bd3121a12e
Replaced TinyMCE 2.x with 3.0 beta 3. Supports everything but IE. Also rewrote the editor interface completely from the ground up.
Dan
parents:
334
diff
changeset
|
104 |
global $lang; |
67bd3121a12e
Replaced TinyMCE 2.x with 3.0 beta 3. Supports everything but IE. Also rewrote the editor interface completely from the ground up.
Dan
parents:
334
diff
changeset
|
105 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
106 |
$warnings = array(); |
756
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
107 |
|
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
108 |
// |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
109 |
// STAGE 0: PARSE SEARCH QUERY |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
110 |
// Identify all terms of the query. Separate between what is required and what is not, and what should be sent through the index as |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
111 |
// opposed to straight-out LIKE-selected. |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
112 |
// |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
113 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
114 |
$query = parse_search_query($query, $warnings); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
115 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
116 |
// Segregate search terms containing spaces |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
117 |
$query_phrase = array( |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
118 |
'any' => array(), |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
119 |
'req' => array() |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
120 |
); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
121 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
122 |
foreach ( $query['any'] as $i => $_ ) |
1 | 123 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
124 |
$term =& $query['any'][$i]; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
125 |
$term = trim($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
126 |
// the indexer only indexes words a-z with apostrophes |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
127 |
if ( preg_match('/[^A-Za-z\']/', $term) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
128 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
129 |
$query_phrase['any'][] = $term; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
130 |
unset($term, $query['any'][$i]); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
131 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
132 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
133 |
unset($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
134 |
$query['any'] = array_values($query['any']); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
135 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
136 |
foreach ( $query['req'] as $i => $_ ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
137 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
138 |
$term =& $query['req'][$i]; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
139 |
$term = trim($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
140 |
if ( preg_match('/[^A-Za-z\']/', $term) ) |
1 | 141 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
142 |
$query_phrase['req'][] = $term; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
143 |
unset($term, $query['req'][$i]); |
1 | 144 |
} |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
145 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
146 |
unset($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
147 |
$query['req'] = array_values($query['req']); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
148 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
149 |
$results = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
150 |
$scores = array(); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
151 |
$ns_list = '(' . implode('|', array_keys($paths->nslist)) . ')'; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
152 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
153 |
// FIXME: Update to use FULLTEXT algo when available. |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
154 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
155 |
// Build an SQL query to load from the index table |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
156 |
if ( count($query['any']) < 1 && count($query['req']) < 1 && count($query_phrase['any']) < 1 && count($query_phrase['req']) < 1 ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
157 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
158 |
// This is both because of technical restrictions and devastation that would occur on shared servers/large sites. |
391
85f91037cd4f
Localization is FINISHED, DAMN IT HELLAH YEAH! OVER WITH! Man, it feels to get that off my chest. Release is in under 48 hours, folks. And we're ready for it.
Dan
parents:
387
diff
changeset
|
159 |
$warnings[] = $lang->get('search_err_query_no_positive'); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
160 |
return array(); |
1 | 161 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
162 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
163 |
// |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
164 |
// STAGE 1 |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
165 |
// Get all possible result pages from the search index. Tally which pages have the most words, and later sort them by boolean relevance |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
166 |
// |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
167 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
168 |
// Skip this if no indexable words are included |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
169 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
170 |
if ( count($query['any']) > 0 || count($query['req']) > 0 ) |
1 | 171 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
172 |
$where_any = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
173 |
foreach ( $query['any'] as $term ) |
1 | 174 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
175 |
$term = escape_string_like($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
176 |
if ( !$case_sensitive ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
177 |
$term = strtolower($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
178 |
$where_any[] = $term; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
179 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
180 |
foreach ( $query['req'] as $term ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
181 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
182 |
$term = escape_string_like($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
183 |
if ( !$case_sensitive ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
184 |
$term = strtolower($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
185 |
$where_any[] = $term; |
1 | 186 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
187 |
|
499
6b7fdd898ba3
Fixed some bugs with PostgreSQL and added a word_lcase column to the search_index table because collation is not working under MySQL. TODO: Trigger search index rebuild on upgrade to 1.1.4.
Dan
parents:
461
diff
changeset
|
188 |
$col_word = ( $case_sensitive ) ? 'word' : 'word_lcase'; |
1201
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
189 |
$where_any_str = ( count($where_any) > 0 ) ? '( ' . $col_word . ' LIKE \'%' . implode('%\' OR ' . $col_word . ' LIKE \'%', $where_any) . '%\' )' : ''; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
190 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
191 |
// generate query |
1201
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
192 |
$sql = "SELECT word, page_names FROM " . table_prefix . "search_index WHERE {$where_any_str}"; |
945
c19242d13a49
Removed "@" from all call_user_func() calls to make debugging special pages and such possible
Dan
parents:
832
diff
changeset
|
193 |
if ( !($q = $db->sql_query($sql)) ) |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
194 |
$db->_die('Error is in perform_search(), includes/search.php, query 1'); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
195 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
196 |
$word_tracking = array(); |
945
c19242d13a49
Removed "@" from all call_user_func() calls to make debugging special pages and such possible
Dan
parents:
832
diff
changeset
|
197 |
if ( $row = $db->fetchrow($q) ) |
1 | 198 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
199 |
do |
1 | 200 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
201 |
// get page list |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
202 |
$pages =& $row['page_names']; |
1201
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
203 |
|
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
204 |
// Find page IDs that contain commas |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
205 |
// This should never happen because commas are escaped by sanitize_page_id(). Nevertheless for compatibility with older |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
206 |
// databases, and to alleviate the concerns of hackers, we'll accommodate for page IDs with commas here by checking for |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
207 |
// IDs that don't match the pattern for stringified page ID + namespace. If it doesn't match, that means it's a continuation |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
208 |
// of the previous ID and should be concatenated to the previous entry. |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
209 |
$matches = strpos($pages, ',') ? explode(',', $pages) : array($pages); |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
210 |
$prev = false; |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
211 |
foreach ( $matches as $i => $_ ) |
1 | 212 |
{ |
1201
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
213 |
$match =& $matches[$i]; |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
214 |
if ( !preg_match("/^ns=$ns_list;pid=(.+)$/", $match) && $prev ) |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
215 |
{ |
1201
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
216 |
$matches[$prev] .= ',' . $match; |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
217 |
unset($match, $matches[$i]); |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
218 |
continue; |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
219 |
} |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
220 |
$prev = $i; |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
221 |
} |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
222 |
unset($match); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
223 |
|
1201
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
224 |
// Iterate through each of the results, assigning scores based on how many times the page has shown up. |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
225 |
// This works because this phase of the search is strongly word-based not page-based. If a page shows up |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
226 |
// multiple times while fetching the result rows from the search_index table, it simply means that page |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
227 |
// contains more than one of the terms the user searched for. |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
228 |
|
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
229 |
foreach ( $matches as $match ) |
1 | 230 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
231 |
$word_cs = (( $case_sensitive ) ? $row['word'] : strtolower($row['word'])); |
1201
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
232 |
if ( isset($word_tracking[$match]) && in_array($word_cs, $word_tracking[$match]) ) |
1 | 233 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
234 |
continue; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
235 |
} |
1201
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
236 |
if ( isset($word_tracking[$match]) ) |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
237 |
{ |
1201
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
238 |
if ( isset($word_tracking[$match]) ) |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
239 |
{ |
1201
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
240 |
$word_tracking[$match][] = $word_cs; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
241 |
} |
1 | 242 |
} |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
243 |
else |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
244 |
{ |
1201
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
245 |
$word_tracking[$match] = array($word_cs); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
246 |
} |
1201
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
247 |
|
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
248 |
// echo '<pre>' . print_r($word_tracking, true) . '</pre>'; |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
249 |
|
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
250 |
$inc = 1; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
251 |
|
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
252 |
// Is this search term present in the page's title? If so, give extra points |
1201
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
253 |
preg_match("/^ns=$ns_list;pid=(.+)$/", $match, $piecesparts); |
945
c19242d13a49
Removed "@" from all call_user_func() calls to make debugging special pages and such possible
Dan
parents:
832
diff
changeset
|
254 |
$title = get_page_title_ns($piecesparts[2], $piecesparts[1]); |
c19242d13a49
Removed "@" from all call_user_func() calls to make debugging special pages and such possible
Dan
parents:
832
diff
changeset
|
255 |
|
c19242d13a49
Removed "@" from all call_user_func() calls to make debugging special pages and such possible
Dan
parents:
832
diff
changeset
|
256 |
$test_func = ( $case_sensitive ) ? 'strstr' : 'stristr'; |
c19242d13a49
Removed "@" from all call_user_func() calls to make debugging special pages and such possible
Dan
parents:
832
diff
changeset
|
257 |
if ( $test_func($title, $row['word']) || $test_func($piecesparts[2], $row['word']) ) |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
258 |
{ |
945
c19242d13a49
Removed "@" from all call_user_func() calls to make debugging special pages and such possible
Dan
parents:
832
diff
changeset
|
259 |
$inc = 1.5; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
260 |
} |
945
c19242d13a49
Removed "@" from all call_user_func() calls to make debugging special pages and such possible
Dan
parents:
832
diff
changeset
|
261 |
|
1201
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
262 |
// increase points if 2 or more words match a phrase in the title |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
263 |
for ( $i = 0; $i < count($where_any) - 1; $i++ ) |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
264 |
{ |
1201
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
265 |
$phrase = "{$where_any[$i]} {$where_any[$i + 1]}"; |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
266 |
if ( $test_func($title, $phrase) ) |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
267 |
{ |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
268 |
$inc *= 1.25; |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
269 |
} |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
270 |
} |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
271 |
|
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
272 |
// Deduct points if there are few similarities between the words |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
273 |
$lev_array = array(); |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
274 |
foreach ( $where_any as $qword ) |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
275 |
{ |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
276 |
if ( strstr($word_cs, $qword) ) |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
277 |
$lev_array[ $qword ] = levenshtein($qword, $word_cs); |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
278 |
} |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
279 |
if ( min($lev_array) > 3 ) |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
280 |
{ |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
281 |
$inc /= array_sum($lev_array) / count($lev_array); |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
282 |
} |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
283 |
|
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
284 |
if ( isset($scores[$match]) ) |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
285 |
{ |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
286 |
$scores[$match] = $scores[$match] + $inc; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
287 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
288 |
else |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
289 |
{ |
1201
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
290 |
$scores[$match] = $inc; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
291 |
} |
1 | 292 |
} |
293 |
} |
|
945
c19242d13a49
Removed "@" from all call_user_func() calls to make debugging special pages and such possible
Dan
parents:
832
diff
changeset
|
294 |
while ( $row = $db->fetchrow($q) ); |
1 | 295 |
} |
945
c19242d13a49
Removed "@" from all call_user_func() calls to make debugging special pages and such possible
Dan
parents:
832
diff
changeset
|
296 |
$db->free_result($q); |
1201
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
297 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
298 |
// |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
299 |
// STAGE 2: FIRST ELIMINATION ROUND |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
300 |
// Iterate through the list of required terms. If a given page is not found to have the required term, eliminate it |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
301 |
// |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
302 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
303 |
foreach ( $query['req'] as $term ) |
1 | 304 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
305 |
foreach ( $word_tracking as $i => $page ) |
1 | 306 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
307 |
if ( !in_array($term, $page) ) |
1 | 308 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
309 |
unset($word_tracking[$i], $scores[$i]); |
1 | 310 |
} |
311 |
} |
|
312 |
} |
|
313 |
} |
|
945
c19242d13a49
Removed "@" from all call_user_func() calls to make debugging special pages and such possible
Dan
parents:
832
diff
changeset
|
314 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
315 |
// |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
316 |
// STAGE 3: PHRASE SEARCHING |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
317 |
// Use LIKE to find pages with specified phrases. We can do a super-picky single query without another elimination round because |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
318 |
// at this stage we can search the full page_text column instead of relying on a word list. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
319 |
// |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
320 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
321 |
// We can skip this stage if none of these special terms apply |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
322 |
|
320
112debff64bd
SURPRISE! Preliminary PostgreSQL support added. The required schema file is not present in this commit and will be included at a later date. No installer support is implemented. Also in this commit: several fixes including <!-- SYSMSG ... --> was broken in template compiler; set fixed width on included images to prevent the thumbnail box from getting huge; added a much more friendly interface to AJAX responses that are invalid JSON
Dan
parents:
292
diff
changeset
|
323 |
$text_col = ( $case_sensitive ) ? 'page_text' : ENANO_SQLFUNC_LOWERCASE . '(page_text)'; |
112debff64bd
SURPRISE! Preliminary PostgreSQL support added. The required schema file is not present in this commit and will be included at a later date. No installer support is implemented. Also in this commit: several fixes including <!-- SYSMSG ... --> was broken in template compiler; set fixed width on included images to prevent the thumbnail box from getting huge; added a much more friendly interface to AJAX responses that are invalid JSON
Dan
parents:
292
diff
changeset
|
324 |
$name_col = ( $case_sensitive ) ? 'name' : ENANO_SQLFUNC_LOWERCASE . '(name)'; |
112debff64bd
SURPRISE! Preliminary PostgreSQL support added. The required schema file is not present in this commit and will be included at a later date. No installer support is implemented. Also in this commit: several fixes including <!-- SYSMSG ... --> was broken in template compiler; set fixed width on included images to prevent the thumbnail box from getting huge; added a much more friendly interface to AJAX responses that are invalid JSON
Dan
parents:
292
diff
changeset
|
325 |
$text_col_join = ( $case_sensitive ) ? 't.page_text' : ENANO_SQLFUNC_LOWERCASE . '(t.page_text)'; |
112debff64bd
SURPRISE! Preliminary PostgreSQL support added. The required schema file is not present in this commit and will be included at a later date. No installer support is implemented. Also in this commit: several fixes including <!-- SYSMSG ... --> was broken in template compiler; set fixed width on included images to prevent the thumbnail box from getting huge; added a much more friendly interface to AJAX responses that are invalid JSON
Dan
parents:
292
diff
changeset
|
326 |
$name_col_join = ( $case_sensitive ) ? 'p.name' : ENANO_SQLFUNC_LOWERCASE . '(p.name)'; |
112debff64bd
SURPRISE! Preliminary PostgreSQL support added. The required schema file is not present in this commit and will be included at a later date. No installer support is implemented. Also in this commit: several fixes including <!-- SYSMSG ... --> was broken in template compiler; set fixed width on included images to prevent the thumbnail box from getting huge; added a much more friendly interface to AJAX responses that are invalid JSON
Dan
parents:
292
diff
changeset
|
327 |
|
112debff64bd
SURPRISE! Preliminary PostgreSQL support added. The required schema file is not present in this commit and will be included at a later date. No installer support is implemented. Also in this commit: several fixes including <!-- SYSMSG ... --> was broken in template compiler; set fixed width on included images to prevent the thumbnail box from getting huge; added a much more friendly interface to AJAX responses that are invalid JSON
Dan
parents:
292
diff
changeset
|
328 |
$concat_column = ( ENANO_DBLAYER == 'MYSQL' ) ? |
112debff64bd
SURPRISE! Preliminary PostgreSQL support added. The required schema file is not present in this commit and will be included at a later date. No installer support is implemented. Also in this commit: several fixes including <!-- SYSMSG ... --> was broken in template compiler; set fixed width on included images to prevent the thumbnail box from getting huge; added a much more friendly interface to AJAX responses that are invalid JSON
Dan
parents:
292
diff
changeset
|
329 |
'CONCAT(\'ns=\',t.namespace,\';pid=\',t.page_id)' : |
112debff64bd
SURPRISE! Preliminary PostgreSQL support added. The required schema file is not present in this commit and will be included at a later date. No installer support is implemented. Also in this commit: several fixes including <!-- SYSMSG ... --> was broken in template compiler; set fixed width on included images to prevent the thumbnail box from getting huge; added a much more friendly interface to AJAX responses that are invalid JSON
Dan
parents:
292
diff
changeset
|
330 |
"'ns=' || t.namespace || ';pid=' || t.page_id"; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
331 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
332 |
if ( count($query_phrase['any']) > 0 || count($query_phrase['req']) > 0 ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
333 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
334 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
335 |
$where_any = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
336 |
foreach ( $query_phrase['any'] as $term ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
337 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
338 |
$term = escape_string_like($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
339 |
if ( !$case_sensitive ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
340 |
$term = strtolower($term); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
341 |
$where_any[] = "( $text_col LIKE '%$term%' OR $name_col LIKE '%$term%' )"; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
342 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
343 |
|
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
344 |
$where_any = ( count($where_any) > 0 ) ? implode(" OR\n ", $where_any) : ''; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
345 |
|
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
346 |
// Also do required terms, but use AND to ensure that all required terms are included |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
347 |
$where_req = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
348 |
foreach ( $query_phrase['req'] as $term ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
349 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
350 |
$term = escape_string_like($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
351 |
if ( !$case_sensitive ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
352 |
$term = strtolower($term); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
353 |
$where_req[] = "( $text_col LIKE '%$term%' OR $name_col LIKE '%$term%' )"; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
354 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
355 |
$and_clause = ( $where_any != '' ) ? 'AND ' : ''; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
356 |
$where_req = ( count($where_req) > 0 ) ? "{$and_clause}" . implode(" AND\n ", $where_req) : ''; |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
357 |
|
1194 | 358 |
$sql = 'SELECT ' . $concat_column . ' AS id, p.name, t.page_text FROM ' . table_prefix . "page_text AS t\n" |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
359 |
. " LEFT JOIN " . table_prefix . "pages AS p\n" |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
360 |
. " ON ( p.urlname = t.page_id AND p.namespace = t.namespace )\n" |
567
7f196509bf78
Actually made the "prevent search indexing" switch work. Needs to be merged with stable.
Dan
parents:
536
diff
changeset
|
361 |
. " WHERE p.visible = 1 AND (\n $where_any\n $where_req\n );"; |
1194 | 362 |
if ( !($q = $db->sql_query($sql)) ) |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
363 |
$db->_die('Error is in perform_search(), includes/search.php, query 2. Parsed query dump follows:<pre>(indexable) ' . htmlspecialchars(print_r($query, true)) . '(non-indexable) ' . htmlspecialchars(print_r($query_phrase, true)) . '</pre>'); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
364 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
365 |
if ( $row = $db->fetchrow() ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
366 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
367 |
do |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
368 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
369 |
$id =& $row['id']; |
1194 | 370 |
$inc = 0.0; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
371 |
|
1194 | 372 |
$title = $row['name']; |
373 |
$test_func = ( $case_sensitive ) ? 'strstr' : 'stristr'; |
|
374 |
||
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
375 |
// Is this search term present in the page's title? If so, give extra points |
1194 | 376 |
$word_list = array_merge($query_phrase['any'], $query_phrase['req']); |
377 |
foreach ( $word_list as $word ) |
|
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
378 |
{ |
1194 | 379 |
if ( $test_func($title, $word) ) |
380 |
$inc += 1.5; |
|
381 |
else if ( $test_func($row['page_text'], $word) ) |
|
382 |
$inc += 1.0; |
|
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
383 |
} |
953
323c4cd1aa37
Made some more changes to the way namespaces are handled, for optimization purposes. This is a bit of a structural reorganization: $paths->pages is obsoleted in its entirety; calculating page existence and metadata is now the job of the Namespace_* backend class. There are many things in PageProcessor that should be reorganized, and page actions in general should really be rethought. This is probably the beginning of a long process that will be taking place over the course of the betas.
Dan
parents:
945
diff
changeset
|
384 |
|
1201
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
385 |
// increase points if 2 or more words match a phrase in the title |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
386 |
for ( $i = 0; $i < count($word_list) - 1; $i++ ) |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
387 |
{ |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
388 |
$phrase = "{$word_list[$i]} {$word_list[$i + 1]}"; |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
389 |
if ( $test_func($title, $phrase) ) |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
390 |
$inc *= 1.25; |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
391 |
else if ( $test_func($row['page_text'], $phrase) ) |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
392 |
$inc *= 1.125; |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
393 |
} |
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
394 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
395 |
if ( isset($scores[$id]) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
396 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
397 |
$scores[$id] = $scores[$id] + $inc; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
398 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
399 |
else |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
400 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
401 |
$scores[$id] = $inc; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
402 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
403 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
404 |
while ( $row = $db->fetchrow() ); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
405 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
406 |
$db->free_result(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
407 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
408 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
409 |
// |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
410 |
// STAGE 4 - SELECT PAGE TEXT AND ELIMINATE NOTS |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
411 |
// At this point, we have a complete list of all the possible pages. Now we want to obtain the page text, and within the same query |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
412 |
// eliminate any terms that shouldn't be in there. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
413 |
// |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
414 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
415 |
// Generate master word list for the highlighter |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
416 |
$word_list = array_values(array_merge($query['any'], $query['req'], $query_phrase['any'], $query_phrase['req'])); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
417 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
418 |
$text_where = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
419 |
foreach ( $scores as $page_id => $_ ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
420 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
421 |
$text_where[] = $db->escape($page_id); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
422 |
} |
320
112debff64bd
SURPRISE! Preliminary PostgreSQL support added. The required schema file is not present in this commit and will be included at a later date. No installer support is implemented. Also in this commit: several fixes including <!-- SYSMSG ... --> was broken in template compiler; set fixed width on included images to prevent the thumbnail box from getting huge; added a much more friendly interface to AJAX responses that are invalid JSON
Dan
parents:
292
diff
changeset
|
423 |
$text_where = '( ' . $concat_column . ' = \'' . implode('\' OR ' . $concat_column . ' = \'', $text_where) . '\' )'; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
424 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
425 |
if ( count($query['not']) > 0 ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
426 |
$text_where .= ' AND'; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
427 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
428 |
$where_not = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
429 |
foreach ( $query['not'] as $term ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
430 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
431 |
$term = escape_string_like($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
432 |
if ( !$case_sensitive ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
433 |
$term = strtolower($term); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
434 |
$where_not[] = $term; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
435 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
436 |
$where_not = ( count($where_not) > 0 ) ? "$text_col NOT LIKE '%" . implode("%' AND $text_col NOT LIKE '%", $where_not) . "%'" : ''; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
437 |
|
320
112debff64bd
SURPRISE! Preliminary PostgreSQL support added. The required schema file is not present in this commit and will be included at a later date. No installer support is implemented. Also in this commit: several fixes including <!-- SYSMSG ... --> was broken in template compiler; set fixed width on included images to prevent the thumbnail box from getting huge; added a much more friendly interface to AJAX responses that are invalid JSON
Dan
parents:
292
diff
changeset
|
438 |
$sql = 'SELECT ' . $concat_column . ' AS id, t.page_id, t.namespace, CHAR_LENGTH(t.page_text) AS page_length, t.page_text, p.name AS page_name FROM ' . table_prefix . "page_text AS t |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
439 |
LEFT JOIN " . table_prefix . "pages AS p |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
440 |
ON ( p.urlname = t.page_id AND p.namespace = t.namespace ) |
567
7f196509bf78
Actually made the "prevent search indexing" switch work. Needs to be merged with stable.
Dan
parents:
536
diff
changeset
|
441 |
WHERE p.visible = 1 AND ( $text_where $where_not );"; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
442 |
if ( !($q = $db->sql_unbuffered_query($sql)) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
443 |
$db->_die('Error is in perform_search(), includes/search.php, query 3'); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
444 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
445 |
$page_data = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
446 |
if ( $row = $db->fetchrow() ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
447 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
448 |
do |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
449 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
450 |
$row['page_text'] = htmlspecialchars($row['page_text']); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
451 |
$row['page_name'] = htmlspecialchars($row['page_name']); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
452 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
453 |
// Highlight results (this is wonderfully automated) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
454 |
$row['page_text'] = highlight_and_clip_search_result($row['page_text'], $word_list, $case_sensitive); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
455 |
if ( strlen($row['page_text']) > 250 && !preg_match('/^\.\.\.(.+)\.\.\.$/', $row['page_text']) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
456 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
457 |
$row['page_text'] = substr($row['page_text'], 0, 150) . '...'; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
458 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
459 |
$row['page_name'] = highlight_search_result($row['page_name'], $word_list, $case_sensitive); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
460 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
461 |
$page_data[$row['id']] = $row; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
462 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
463 |
while ( $row = $db->fetchrow() ); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
464 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
465 |
$db->free_result(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
466 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
467 |
// |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
468 |
// STAGE 5 - SPECIAL PAGE TITLE SEARCH |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
469 |
// Iterate through $paths->pages and check the titles for search terms. Score accordingly. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
470 |
// |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
471 |
|
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
472 |
foreach ( $paths->pages as $id => $page ) |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
473 |
{ |
567
7f196509bf78
Actually made the "prevent search indexing" switch work. Needs to be merged with stable.
Dan
parents:
536
diff
changeset
|
474 |
if ( $page['namespace'] != 'Special' || $page['visible'] == 0 ) |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
475 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
476 |
$idstring = 'ns=' . $page['namespace'] . ';pid=' . $page['urlname_nons']; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
477 |
$any = array_values(array_unique(array_merge($query['any'], $query_phrase['any']))); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
478 |
foreach ( $any as $term ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
479 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
480 |
if ( $case_sensitive ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
481 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
482 |
if ( strstr($page['name'], $term) || strstr($page['urlname_nons'], $term) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
483 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
484 |
( isset($scores[$idstring]) ) ? $scores[$idstring] = $scores[$idstring] + 1.5 : $scores[$idstring] = 1.5; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
485 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
486 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
487 |
else |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
488 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
489 |
if ( stristr($page['name'], $term) || stristr($page['urlname_nons'], $term) ) |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
490 |
{ |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
491 |
( isset($scores[$idstring]) ) ? $scores[$idstring] = $scores[$idstring] + 1.5 : $scores[$idstring] = 1.5; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
492 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
493 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
494 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
495 |
if ( isset($scores[$idstring]) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
496 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
497 |
$page_data[$idstring] = array( |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
498 |
'page_name' => highlight_search_result($page['name'], $word_list, $case_sensitive), |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
499 |
'page_text' => '', |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
500 |
'page_id' => $page['urlname_nons'], |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
501 |
'namespace' => $page['namespace'], |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
502 |
'score' => $scores[$idstring], |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
503 |
'page_length' => 1, |
335
67bd3121a12e
Replaced TinyMCE 2.x with 3.0 beta 3. Supports everything but IE. Also rewrote the editor interface completely from the ground up.
Dan
parents:
334
diff
changeset
|
504 |
'page_note' => '[' . $lang->get('search_result_tag_special') . ']' |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
505 |
); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
506 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
507 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
508 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
509 |
// |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
510 |
// STAGE 6 - SECOND ELIMINATION ROUND |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
511 |
// Iterate through the list of required terms. If a given page is not found to have the required term, eliminate it |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
512 |
// |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
513 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
514 |
$required = array_merge($query['req'], $query_phrase['req']); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
515 |
foreach ( $required as $term ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
516 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
517 |
foreach ( $page_data as $id => $page ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
518 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
519 |
if ( ( $page['namespace'] == 'Special' || ( $page['namespace'] != 'Special' && !strstr($page['page_text'], $term) ) ) && !strstr($page['page_id'], $term) && !strstr($page['page_name'], $term) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
520 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
521 |
unset($page_data[$id]); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
522 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
523 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
524 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
525 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
526 |
// At this point, all of our normal results are in. However, we can also allow plugins to hook into the system and score their own |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
527 |
// pages and add text, etc. as necessary. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
528 |
// Plugins are COMPLETELY responsible for using the search terms and handling Boolean logic properly |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
529 |
|
756
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
530 |
inject_custom_search_results($query, $query_phrase, $scores, $page_data, $case_sensitive, $word_list); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
531 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
532 |
$code = $plugins->setHook('search_global_inner'); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
533 |
foreach ( $code as $cmd ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
534 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
535 |
eval($cmd); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
536 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
537 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
538 |
// a marvelous debugging aid :-) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
539 |
// die('<pre>' . htmlspecialchars(print_r($page_data, true)) . '</pre>'); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
540 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
541 |
// |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
542 |
// STAGE 7 - HIGHLIGHT, TRIM, AND SCORE RESULTS |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
543 |
// We now have the complete results of the search. We need to trim text down to show only portions of the page containing search |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
544 |
// terms, highlight any search terms within the page, and sort the final results array in descending order of score. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
545 |
// |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
546 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
547 |
// Sort scores array |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
548 |
arsort($scores); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
549 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
550 |
// Divisor for calculating relevance scores |
320
112debff64bd
SURPRISE! Preliminary PostgreSQL support added. The required schema file is not present in this commit and will be included at a later date. No installer support is implemented. Also in this commit: several fixes including <!-- SYSMSG ... --> was broken in template compiler; set fixed width on included images to prevent the thumbnail box from getting huge; added a much more friendly interface to AJAX responses that are invalid JSON
Dan
parents:
292
diff
changeset
|
551 |
$divisor = ( count($query['any']) + count($query_phrase['any']) + count($query['req']) + count($query['not']) ) * 1.5; |
1201
9593e62929d1
Search: some refinements to algorithm, introduced score promotion for phrase matches and Levenshtein distance based score manipulation
Dan
parents:
1194
diff
changeset
|
552 |
$divisor = max($divisor, max($scores)); |
461 | 553 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
554 |
foreach ( $scores as $page_id => $score ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
555 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
556 |
if ( !isset($page_data[$page_id]) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
557 |
// It's possible that $scores contains a score for a page that was later eliminated because it contained a disallowed term |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
558 |
continue; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
559 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
560 |
// Make a copy of the datum, then delete the original (it frees up a LOT of RAM) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
561 |
$datum = $page_data[$page_id]; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
562 |
unset($page_data[$page_id]); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
563 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
564 |
// This is an internal value used for sorting - it's no longer needed. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
565 |
unset($datum['id']); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
566 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
567 |
// Calculate score |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
568 |
// if ( $score > $divisor ) |
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
569 |
// $score = $divisor; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
570 |
$datum['score'] = round($score / $divisor, 2) * 100; |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
571 |
|
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
572 |
// Highlight the URL |
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
573 |
$datum['url_highlight'] = makeUrlComplete($datum['namespace'], $datum['page_id']); |
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
574 |
$datum['url_highlight'] = preg_replace('/\?.+$/', '', $datum['url_highlight']); |
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
575 |
$datum['url_highlight'] = highlight_search_result($datum['url_highlight'], $word_list, $case_sensitive); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
576 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
577 |
// Store it in our until-now-unused results array |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
578 |
$results[] = $datum; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
579 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
580 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
581 |
// Our work here is done. :-D |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
582 |
return $results; |
1 | 583 |
} |
584 |
||
585 |
/** |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
586 |
* Parses a search query into an associative array. The resultant array will be filled with the following values, each an array: |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
587 |
* any: Search terms that can optionally be present |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
588 |
* req: Search terms that must be present |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
589 |
* not: Search terms that should not be present |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
590 |
* @param string Search query |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
591 |
* @param array Will be filled with parser warnings, such as query too short, words too short, etc. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
592 |
* @return array |
1 | 593 |
*/ |
594 |
||
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
595 |
function parse_search_query($query, &$warnings) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
596 |
{ |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
597 |
global $lang; |
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
598 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
599 |
$stopwords = get_stopwords(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
600 |
$ret = array( |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
601 |
'any' => array(), |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
602 |
'req' => array(), |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
603 |
'not' => array() |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
604 |
); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
605 |
$warnings = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
606 |
$terms = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
607 |
$in_quote = false; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
608 |
$start_term = 0; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
609 |
$just_finished = false; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
610 |
for ( $i = 0; $i < strlen($query); $i++ ) |
1 | 611 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
612 |
$chr = $query{$i}; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
613 |
$prev = ( $i > 0 ) ? $query{ $i - 1 } : ''; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
614 |
$next = ( ( $i + 1 ) < strlen($query) ) ? $query{ $i + 1 } : ''; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
615 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
616 |
if ( ( $chr == ' ' && !$in_quote ) || ( $i + 1 == strlen ( $query ) ) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
617 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
618 |
$len = ( $next == '' ) ? $i + 1 : $i - $start_term; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
619 |
$word = substr ( $query, $start_term, $len ); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
620 |
$terms[] = $word; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
621 |
$start_term = $i + 1; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
622 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
623 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
624 |
elseif ( $chr == '"' && $in_quote && $prev != '\\' ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
625 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
626 |
$word = substr ( $query, $start_term, $i - $start_term + 1 ); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
627 |
$start_pos = ( $next == ' ' ) ? $i + 2 : $i + 1; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
628 |
$in_quote = false; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
629 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
630 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
631 |
elseif ( $chr == '"' && !$in_quote ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
632 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
633 |
$in_quote = true; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
634 |
$start_pos = $i; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
635 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
636 |
|
1 | 637 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
638 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
639 |
$ticker = 0; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
640 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
641 |
foreach ( $terms as $element => $__unused ) |
1 | 642 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
643 |
$atom =& $terms[$element]; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
644 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
645 |
$ticker++; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
646 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
647 |
if ( $ticker == 20 ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
648 |
{ |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
649 |
$warnings[] = $lang->get('search_err_query_too_many_terms'); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
650 |
break; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
651 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
652 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
653 |
if ( substr ( $atom, 0, 2 ) == '+"' && substr ( $atom, ( strlen ( $atom ) - 1 ), 1 ) == '"' ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
654 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
655 |
$word = substr ( $atom, 2, ( strlen( $atom ) - 3 ) ); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
656 |
if ( strlen ( $word ) < 2 || in_array($word, $stopwords) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
657 |
{ |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
658 |
$warnings[] = $lang->get('search_err_query_has_stopwords'); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
659 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
660 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
661 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
662 |
if(in_array($word, $ret['req'])) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
663 |
{ |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
664 |
$warnings[] = $lang->get('search_err_query_dup_terms'); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
665 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
666 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
667 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
668 |
$ret['req'][] = $word; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
669 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
670 |
elseif ( substr ( $atom, 0, 2 ) == '-"' && substr ( $atom, ( strlen ( $atom ) - 1 ), 1 ) == '"' ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
671 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
672 |
$word = substr ( $atom, 2, ( strlen( $atom ) - 3 ) ); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
673 |
if ( strlen ( $word ) < 4 ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
674 |
{ |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
675 |
$warnings[] = $lang->get('search_err_query_term_too_short'); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
676 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
677 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
678 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
679 |
if(in_array($word, $ret['not'])) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
680 |
{ |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
681 |
$warnings[] = $lang->get('search_err_query_dup_terms'); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
682 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
683 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
684 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
685 |
$ret['not'][] = $word; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
686 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
687 |
elseif ( substr ( $atom, 0, 1 ) == '+' ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
688 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
689 |
$word = substr ( $atom, 1 ); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
690 |
if ( strlen ( $word ) < 2 || in_array($word, $stopwords) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
691 |
{ |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
692 |
$warnings[] = $lang->get('search_err_query_has_stopwords'); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
693 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
694 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
695 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
696 |
if(in_array($word, $ret['req'])) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
697 |
{ |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
698 |
$warnings[] = $lang->get('search_err_query_dup_terms'); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
699 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
700 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
701 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
702 |
$ret['req'][] = $word; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
703 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
704 |
elseif ( substr ( $atom, 0, 1 ) == '-' ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
705 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
706 |
$word = substr ( $atom, 1 ); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
707 |
if ( strlen ( $word ) < 2 || in_array($word, $stopwords) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
708 |
{ |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
709 |
$warnings[] = $lang->get('search_err_query_has_stopwords'); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
710 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
711 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
712 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
713 |
if(in_array($word, $ret['not'])) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
714 |
{ |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
715 |
$warnings[] = $lang->get('search_err_query_dup_terms'); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
716 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
717 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
718 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
719 |
$ret['not'][] = $word; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
720 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
721 |
elseif ( substr ( $atom, 0, 1 ) == '"' && substr ( $atom, ( strlen($atom) - 1 ), 1 ) == '"' ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
722 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
723 |
$word = substr ( $atom, 1, ( strlen ( $atom ) - 2 ) ); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
724 |
if ( strlen ( $word ) < 2 || in_array($word, $stopwords) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
725 |
{ |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
726 |
$warnings[] = $lang->get('search_err_query_has_stopwords'); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
727 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
728 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
729 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
730 |
if(in_array($word, $ret['any'])) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
731 |
{ |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
732 |
$warnings[] = $lang->get('search_err_query_dup_terms'); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
733 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
734 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
735 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
736 |
$ret['any'][] = $word; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
737 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
738 |
else |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
739 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
740 |
$word = $atom; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
741 |
if ( strlen ( $word ) < 2 || in_array($word, $stopwords) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
742 |
{ |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
743 |
$warnings[] = $lang->get('search_err_query_has_stopwords'); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
744 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
745 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
746 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
747 |
if(in_array($word, $ret['any'])) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
748 |
{ |
334
c72b545f1304
More localization work. Resolved major issue with JSON parser not parsing files over ~50KB. Switched JSON parser to the one from the Zend Framework (BSD licensed). Forced to split enano.json into five different files.
Dan
parents:
322
diff
changeset
|
749 |
$warnings[] = $lang->get('search_err_query_dup_terms'); |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
750 |
$ticker--; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
751 |
continue; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
752 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
753 |
$ret['any'][] = $word; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
754 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
755 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
756 |
return $ret; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
757 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
758 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
759 |
/** |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
760 |
* Escapes a string for use in a LIKE clause. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
761 |
* @param string |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
762 |
* @return string |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
763 |
*/ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
764 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
765 |
function escape_string_like($string) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
766 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
767 |
global $db, $session, $paths, $template, $plugins; // Common objects |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
768 |
$string = $db->escape($string); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
769 |
$string = str_replace(array('%', '_'), array('\%', '\_'), $string); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
770 |
return $string; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
771 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
772 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
773 |
/** |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
774 |
* Wraps <highlight></highlight> tags around all words in both the specified array. Does not perform any clipping. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
775 |
* @param string Text to process |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
776 |
* @param array Word list |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
777 |
* @param bool If true, searches case-sensitively when highlighting words |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
778 |
* @return string |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
779 |
*/ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
780 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
781 |
function highlight_search_result($pt, $words, $case_sensitive = false) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
782 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
783 |
$words2 = array(); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
784 |
for ( $i = 0; $i < sizeof($words); $i++) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
785 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
786 |
if(!empty($words[$i])) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
787 |
$words2[] = preg_quote($words[$i]); |
1 | 788 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
789 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
790 |
$flag = ( $case_sensitive ) ? '' : 'i'; |
1194 | 791 |
$regex = '/(' . implode('|', str_replace('/', '\\/', $words2)) . ')/' . $flag; |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
792 |
$pt = preg_replace($regex, '<highlight>\\1</highlight>', $pt); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
793 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
794 |
return $pt; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
795 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
796 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
797 |
/** |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
798 |
* Wraps <highlight></highlight> tags around all words in both the specified array and the specified text and clips the text to |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
799 |
* an appropriate length. |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
800 |
* @param string Text to process |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
801 |
* @param array Word list |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
802 |
* @param bool If true, searches case-sensitively when highlighting words |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
803 |
* @return string |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
804 |
*/ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
805 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
806 |
function highlight_and_clip_search_result($pt, $words, $case_sensitive = false) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
807 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
808 |
$cut_off = false; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
809 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
810 |
$space_chars = Array("\t", "\n", "\r", " "); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
811 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
812 |
$pt = highlight_search_result($pt, $words, $case_sensitive); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
813 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
814 |
foreach ( $words as $word ) |
1 | 815 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
816 |
// Boldface searched words |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
817 |
$ptlen = strlen($pt); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
818 |
for ( $i = 0; $i < $ptlen; $i++ ) |
1 | 819 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
820 |
$len = strlen($word); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
821 |
if ( strtolower(substr($pt, $i, $len)) == strtolower($word) ) |
1 | 822 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
823 |
$chunk1 = substr($pt, 0, $i); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
824 |
$chunk2 = substr($pt, $i, $len); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
825 |
$chunk3 = substr($pt, ( $i + $len )); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
826 |
$pt = $chunk1 . $chunk2 . $chunk3; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
827 |
$ptlen = strlen($pt); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
828 |
// Cut off text to 150 chars or so |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
829 |
if ( !$cut_off ) |
1 | 830 |
{ |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
831 |
$cut_off = true; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
832 |
if ( $i - 75 > 0 ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
833 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
834 |
// Navigate backwards until a space character is found |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
835 |
$chunk = substr($pt, 0, ( $i - 75 )); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
836 |
$final_chunk = $chunk; |
320
112debff64bd
SURPRISE! Preliminary PostgreSQL support added. The required schema file is not present in this commit and will be included at a later date. No installer support is implemented. Also in this commit: several fixes including <!-- SYSMSG ... --> was broken in template compiler; set fixed width on included images to prevent the thumbnail box from getting huge; added a much more friendly interface to AJAX responses that are invalid JSON
Dan
parents:
292
diff
changeset
|
837 |
for ( $j = strlen($chunk) - 1; $j > 0; $j = $j - 1 ) |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
838 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
839 |
if ( in_array($chunk{$j}, $space_chars) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
840 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
841 |
$final_chunk = substr($chunk, $j + 1); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
842 |
break; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
843 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
844 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
845 |
$mid_chunk = substr($pt, ( $i - 75 ), 75); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
846 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
847 |
$clipped = '...' . $final_chunk . $mid_chunk . $chunk2; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
848 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
849 |
$chunk = substr($pt, ( $i + strlen($chunk2) + 75 )); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
850 |
$final_chunk = $chunk; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
851 |
for ( $j = 0; $j < strlen($chunk); $j++ ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
852 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
853 |
if ( in_array($chunk{$j}, $space_chars) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
854 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
855 |
$final_chunk = substr($chunk, 0, $j); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
856 |
break; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
857 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
858 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
859 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
860 |
$end_chunk = substr($pt, ( $i + strlen($chunk2) ), 75 ); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
861 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
862 |
$clipped .= $end_chunk . $final_chunk . '...'; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
863 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
864 |
$pt = $clipped; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
865 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
866 |
else if ( strlen($pt) > 200 ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
867 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
868 |
$mid_chunk = substr($pt, ( $i - 75 ), 75); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
869 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
870 |
$clipped = $chunk1 . $chunk2; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
871 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
872 |
$chunk = substr($pt, ( $i + strlen($chunk2) + 75 )); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
873 |
$final_chunk = $chunk; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
874 |
for ( $j = 0; $j < strlen($chunk); $j++ ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
875 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
876 |
if ( in_array($chunk{$j}, $space_chars) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
877 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
878 |
$final_chunk = substr($chunk, 0, $j); |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
879 |
break; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
880 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
881 |
} |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
882 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
883 |
$end_chunk = substr($pt, ( $i + strlen($chunk2) ), 75 ); |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
884 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
885 |
$clipped .= $end_chunk . $final_chunk . '...'; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
886 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
887 |
$pt = $clipped; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
888 |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
889 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
890 |
break 2; |
1 | 891 |
} |
892 |
} |
|
893 |
} |
|
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
894 |
$cut_off = false; |
1 | 895 |
} |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
896 |
return $pt; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
897 |
} |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
898 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
899 |
/** |
461 | 900 |
* Returns a list of words that shouldn't under most circumstances be indexed for searching. |
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
901 |
* @return array |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
902 |
*/ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
903 |
|
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
904 |
function get_stopwords() |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
905 |
{ |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
906 |
static $stopwords; |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
907 |
if ( is_array($stopwords) ) |
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
908 |
return $stopwords; |
292
b3cfaf0a505c
Fixed highlighting in search results; changed search algorithm to give more score for terms found in page title; hopefully (hackishly) fixed login_key_cache getting too long
Dan
parents:
272
diff
changeset
|
909 |
|
461 | 910 |
$stopwords = array('I', 'a', 'about', 'an', 'are', 'as', 'at', 'be', 'by', 'com', 'de', 'en', 'for', 'from', 'how', 'in', 'is', 'it', |
911 |
'la', 'of', 'on', 'or', 'that', 'the', 'this', 'to', 'was', 'what', 'when', 'where', 'who', 'will', 'with', 'and', |
|
912 |
'the'); |
|
913 |
||
272
e0ec986c0af3
Searching sucks, and Enano's search algorithm was complete bullcrap. So I rewrote it. No, it does not use Google search technology. Like they have a patent for using the Arial font on search result pages anyway.
Dan
parents:
166
diff
changeset
|
914 |
return $stopwords; |
1 | 915 |
} |
916 |
||
756
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
917 |
/** |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
918 |
* Private function to inject custom results into a search. |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
919 |
*/ |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
920 |
|
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
921 |
function inject_custom_search_results(&$query, &$query_phrase, &$scores, &$page_data, &$case_sensitive, &$word_list) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
922 |
{ |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
923 |
global $db, $session, $paths, $template, $plugins; // Common objects |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
924 |
global $lang; |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
925 |
|
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
926 |
global $search_handlers; |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
927 |
|
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
928 |
// global functions |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
929 |
$terms = array( |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
930 |
'any' => array_merge($query['any'], $query_phrase['any']), |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
931 |
'req' => array_merge($query['req'], $query_phrase['req']), |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
932 |
'not' => $query['not'] |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
933 |
); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
934 |
|
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
935 |
foreach ( $search_handlers as &$options ) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
936 |
{ |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
937 |
$where = array('any' => array(), 'req' => array(), 'not' => array()); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
938 |
$where_any =& $where['any']; |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
939 |
$where_req =& $where['req']; |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
940 |
$where_not =& $where['not']; |
757 | 941 |
$title_col = ( $case_sensitive ) ? $options['titlecolumn'] : ENANO_SQLFUNC_LOWERCASE . '(' . $options['titlecolumn'] . ')'; |
756
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
942 |
if ( isset($options['datacolumn']) ) |
757 | 943 |
$desc_col = ( $case_sensitive ) ? $options['datacolumn'] : ENANO_SQLFUNC_LOWERCASE . '(' . $options['datacolumn'] . ')'; |
756
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
944 |
else |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
945 |
$desc_col = "''"; |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
946 |
foreach ( $terms['any'] as $term ) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
947 |
{ |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
948 |
$term = escape_string_like($term); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
949 |
if ( !$case_sensitive ) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
950 |
$term = strtolower($term); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
951 |
$where_any[] = "( $title_col LIKE '%{$term}%' OR $desc_col LIKE '%{$term}%' )"; |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
952 |
} |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
953 |
foreach ( $terms['req'] as $term ) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
954 |
{ |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
955 |
$term = escape_string_like($term); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
956 |
if ( !$case_sensitive ) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
957 |
$term = strtolower($term); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
958 |
$where_req[] = "( $title_col LIKE '%{$term}%' OR $desc_col LIKE '%{$term}%' )"; |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
959 |
} |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
960 |
foreach ( $terms['not'] as $term ) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
961 |
{ |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
962 |
$term = escape_string_like($term); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
963 |
if ( !$case_sensitive ) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
964 |
$term = strtolower($term); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
965 |
$where_not[] = "$title_col NOT LIKE '%{$term}%' AND $desc_col NOT LIKE '%{$term}%'"; |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
966 |
} |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
967 |
if ( empty($where_any) ) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
968 |
unset($where_any, $where['any']); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
969 |
if ( empty($where_req) ) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
970 |
unset($where_req, $where['req']); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
971 |
if ( empty($where_not) ) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
972 |
unset($where_not, $where['not']); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
973 |
|
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
974 |
$where_any = '(' . implode(' OR ', $where_any) . '' . ( isset($where['req']) || isset($where['not']) ? ' OR 1 = 1' : '' ) . ')'; |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
975 |
|
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
976 |
if ( isset($where_req) ) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
977 |
$where_req = implode(' AND ', $where_req); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
978 |
if ( isset($where_not) ) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
979 |
$where_not = implode( 'AND ', $where_not); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
980 |
|
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
981 |
$where = implode(' AND ', $where); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
982 |
|
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
983 |
$columns = $options['titlecolumn']; |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
984 |
if ( isset($options['datacolumn']) ) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
985 |
$columns .= ", {$options['datacolumn']}"; |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
986 |
if ( isset($options['additionalcolumns']) ) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
987 |
$columns .= ', ' . implode(', ', $options['additionalcolumns']); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
988 |
|
758
6b79a49f85f0
Added additionalwhere option (it was already documented) to custom search API
Dan
parents:
757
diff
changeset
|
989 |
$additionalwhere = ( isset($options['additionalwhere']) ) ? $options['additionalwhere'] : ''; |
6b79a49f85f0
Added additionalwhere option (it was already documented) to custom search API
Dan
parents:
757
diff
changeset
|
990 |
|
756
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
991 |
$sql = "SELECT $columns FROM " . table_prefix . "{$options['table']} WHERE ( $where ) $additionalwhere;"; |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
992 |
|
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
993 |
if ( !($q = $db->sql_unbuffered_query($sql)) ) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
994 |
{ |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
995 |
$db->_die('Automatically generated search query'); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
996 |
} |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
997 |
|
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
998 |
if ( $row = $db->fetchrow() ) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
999 |
{ |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1000 |
do |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1001 |
{ |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1002 |
$parser = $template->makeParserText($options['uniqueid']); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1003 |
$parser->assign_vars($row); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1004 |
$idstring = $parser->run(); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1005 |
|
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1006 |
// Score this result |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1007 |
foreach ( $word_list as $term ) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1008 |
{ |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1009 |
if ( $case_sensitive ) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1010 |
{ |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1011 |
if ( strstr($row[$options['titlecolumn']], $term) ) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1012 |
{ |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1013 |
( isset($scores[$idstring]) ) ? $scores[$idstring] += 1.5 : $scores[$idstring] = 1.5; |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1014 |
} |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1015 |
else if ( isset($options['datacolumn']) && strstr($row[$options['datacolumn']], $term) ) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1016 |
{ |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1017 |
( isset($scores[$idstring]) ) ? $scores[$idstring]++ : $scores[$idstring] = 1; |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1018 |
} |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1019 |
} |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1020 |
else |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1021 |
{ |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1022 |
if ( stristr($row[$options['titlecolumn']], $term) ) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1023 |
{ |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1024 |
( isset($scores[$idstring]) ) ? $scores[$idstring] += 1.5 : $scores[$idstring] = 1.5; |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1025 |
} |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1026 |
else if ( isset($options['datacolumn']) && stristr($row[$options['datacolumn']], $term) ) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1027 |
{ |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1028 |
( isset($scores[$idstring]) ) ? $scores[$idstring]++ : $scores[$idstring] = 1; |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1029 |
} |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1030 |
} |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1031 |
} |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1032 |
// Generate text... |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1033 |
$text = ''; |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1034 |
if ( isset($options['datacolumn']) && !isset($options['formatcallback']) ) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1035 |
{ |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1036 |
$text = highlight_and_clip_search_result(htmlspecialchars($row[$options['datacolumn']]), $word_list); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1037 |
} |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1038 |
else if ( isset($options['formatcallback']) ) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1039 |
{ |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1040 |
if ( is_callable($options['formatcallback']) ) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1041 |
{ |
945
c19242d13a49
Removed "@" from all call_user_func() calls to make debugging special pages and such possible
Dan
parents:
832
diff
changeset
|
1042 |
$text = call_user_func($options['formatcallback'], $row, $word_list); |
756
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1043 |
} |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1044 |
else |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1045 |
{ |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1046 |
$parser = $template->makeParserText($options['formatcallback']); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1047 |
$parser->assign_vars($row); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1048 |
$text = $parser->run(); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1049 |
} |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1050 |
} |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1051 |
|
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1052 |
// Inject result |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1053 |
|
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1054 |
if ( isset($scores[$idstring]) ) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1055 |
{ |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1056 |
$parser = $template->makeParserText($options['linkformat']['page_id']); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1057 |
$parser->assign_vars($row); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1058 |
$page_id = $parser->run(); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1059 |
|
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1060 |
$parser = $template->makeParserText($options['linkformat']['namespace']); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1061 |
$parser->assign_vars($row); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1062 |
$namespace = $parser->run(); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1063 |
|
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1064 |
$page_data[$idstring] = array( |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1065 |
'page_name' => highlight_search_result(htmlspecialchars($row[$options['titlecolumn']]), $word_list), |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1066 |
'page_text' => $text, |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1067 |
'score' => $scores[$idstring], |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1068 |
'page_id' => $page_id, |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1069 |
'namespace' => $namespace, |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1070 |
); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1071 |
|
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1072 |
// Any additional flags that need to be added to the result? |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1073 |
// The small usually-bracketed text to the left of the title |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1074 |
if ( isset($options['resultnote']) ) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1075 |
{ |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1076 |
$page_data[$idstring]['page_note'] = $options['resultnote']; |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1077 |
} |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1078 |
// Should we include the length? |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1079 |
if ( isset($options['datacolumn']) ) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1080 |
{ |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1081 |
$page_data[$idstring]['page_length'] = strlen($row[$options['datacolumn']]); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1082 |
} |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1083 |
else |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1084 |
{ |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1085 |
$page_data[$idstring]['page_length'] = 0; |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1086 |
$page_data[$idstring]['zero_length'] = true; |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1087 |
} |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1088 |
// Anything to append to result links? |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1089 |
if ( isset($options['linkformat']['append']) ) |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1090 |
{ |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1091 |
$page_data[$idstring]['url_append'] = $options['linkformat']['append']; |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1092 |
} |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1093 |
} |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1094 |
} |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1095 |
while ( $row = $db->fetchrow($q) ); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1096 |
$db->free_result($q); |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1097 |
} |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1098 |
} |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1099 |
} |
e8cf18383425
Added a new search API that allows much easier registration of search results. Basically you give the engine a table, a few columns to look at, and tell it how to format the results and you're done.
Dan
parents:
685
diff
changeset
|
1100 |
|
1 | 1101 |
?> |