Mailing List Archive

r3805 - trunk/c_src/KinoSearch/Index
Author: creamyg
Date: 2008-08-30 11:57:53 -0700 (Sat, 30 Aug 2008)
New Revision: 3805

Modified:
trunk/c_src/KinoSearch/Index/PostingList.bp
trunk/c_src/KinoSearch/Index/SegLexicon.c
trunk/c_src/KinoSearch/Index/SegPostingList.bp
trunk/c_src/KinoSearch/Index/SegPostingList.c
Log:
Move Seek_Lex() from PostingList into SegPostingList.


Modified: trunk/c_src/KinoSearch/Index/PostingList.bp
===================================================================
--- trunk/c_src/KinoSearch/Index/PostingList.bp 2008-08-30 18:56:47 UTC (rev 3804)
+++ trunk/c_src/KinoSearch/Index/PostingList.bp 2008-08-30 18:57:53 UTC (rev 3805)
@@ -39,25 +39,27 @@
public abstract void
Seek(PostingList *self, Obj *target = NULL);

- /** Occasionally optimized version of PList_Seek, designed to speed
- * sequential access.
- */
abstract void
Seek_Lex(PostingList *self, Lexicon *lexicon);

- /** Advance the PostingList object to the next document. Return a positive
- * document number, or 0 once the iterator is exhausted.
+ /** Advance the iterator to the next document.
+ *
+ * @return A positive document number, or 0 once the iterator is exhausted.
*/
public abstract i32_t
Next(PostingList *self);

- /** Skip to the first doc number greater than or equal to [target]. Return
- * the doc number, or 0 once the iterator is exhausted.
+ /** Advance the iterator to the first doc number greater than or equal to
+ * <code>target</code>.
+ *
+ * @param target A positive document number, which must be greater than
+ * the current document number once the iterator has been initialized.
+ * @return A positive document number, or 0 once the iterator is exhausted.
*/
i32_t
Skip_To(PostingList *self, i32_t target);

- /** Invoke Post_Make_Scorer for this PostingList's posting.
+ /** Invoke Post_Make_Scorer() for this PostingList's posting.
*/
abstract Scorer*
Make_Scorer(PostingList *self, Similarity *similarity,

Modified: trunk/c_src/KinoSearch/Index/SegLexicon.c
===================================================================
--- trunk/c_src/KinoSearch/Index/SegLexicon.c 2008-08-30 18:56:47 UTC (rev 3804)
+++ trunk/c_src/KinoSearch/Index/SegLexicon.c 2008-08-30 18:57:53 UTC (rev 3805)
@@ -9,6 +9,7 @@
#include "KinoSearch/Index/TermInfo.h"
#include "KinoSearch/Index/LexStepper.h"
#include "KinoSearch/Index/SegLexCache.h"
+#include "KinoSearch/Index/SegPostingList.h"
#include "KinoSearch/Store/Folder.h"
#include "KinoSearch/Store/InStream.h"
#include "KinoSearch/Util/I32Array.h"
@@ -221,19 +222,24 @@
i32_t *ints = MALLOCATE(max_docs + 1, i32_t);
i32_t term_num = 0;
i32_t i;
+ SegPostingList *seg_plist = (SegPostingList*)plist;
+
+ if ( !OBJ_IS_A(seg_plist, SEGPOSTINGLIST)) {
+ CONFESS("Not a SegPostingList: %o", plist);
+ }

for (i = 0; i <= max_docs; i++) {
ints[i] = -1;
}

SegLex_Reset(self);
-
while (SegLex_Next(self)) {
- PList_Seek_Lex(plist, (Lexicon*)self);
+ i32_t doc_num;
+ SegPList_Seek_Lex(seg_plist, (Lexicon*)self);

/* Assign the same sort position to all docs with this term. */
- while (PList_Next(plist)) {
- ints[ PList_Get_Doc_Num(plist) ] = term_num;
+ while (0 != (doc_num = SegPList_Next(seg_plist))) {
+ ints[doc_num] = term_num;
}
term_num++;
}

Modified: trunk/c_src/KinoSearch/Index/SegPostingList.bp
===================================================================
--- trunk/c_src/KinoSearch/Index/SegPostingList.bp 2008-08-30 18:56:47 UTC (rev 3804)
+++ trunk/c_src/KinoSearch/Index/SegPostingList.bp 2008-08-30 18:57:53 UTC (rev 3805)
@@ -59,6 +59,8 @@
public void
Seek(SegPostingList *self, Obj *target);

+ /** Optimized version of Seek(), designed to speed sequential access.
+ */
void
Seek_Lex(SegPostingList *self, Lexicon *lexicon);


Modified: trunk/c_src/KinoSearch/Index/SegPostingList.c
===================================================================
--- trunk/c_src/KinoSearch/Index/SegPostingList.c 2008-08-30 18:56:47 UTC (rev 3804)
+++ trunk/c_src/KinoSearch/Index/SegPostingList.c 2008-08-30 18:57:53 UTC (rev 3805)
@@ -240,7 +240,8 @@
) {
seek_tinfo(self, SegLex_Get_Term_Info(seg_lexicon));
}
- /* Punt case. */
+ /* Punt case. This is more expensive because of the call to
+ * LexReader_Fetch_Term_Info() in Seek(). */
else {
Obj *term = Lex_Get_Term(lexicon);
SegPList_Seek(self, term);


_______________________________________________
kinosearch-commits mailing list
kinosearch-commits@rectangular.com
http://www.rectangular.com/mailman/listinfo/kinosearch-commits