[Checkins] SVN: zope.index/trunk/ - Fixed a broken optimization in okascore.c: it was passing a Python

Shane Hathaway shane at hathawaymix.org
Sat Jul 25 06:26:02 EDT 2009


Log message for revision 102293:
  - Fixed a broken optimization in okascore.c: it was passing a Python
    float to the PyInt_AS_LONG() macro. This resulted in wrong scores,
    especially on 64 bit platforms, where all scores typically ended up
    being zero.
  
  - Changed okascore.c to produce the same results as its Python
    equivalent, reducing the brittleness of the text index tests.
  

Changed:
  U   zope.index/trunk/CHANGES.txt
  U   zope.index/trunk/src/zope/index/text/okascore.c
  U   zope.index/trunk/src/zope/index/text/textindex.txt

-=-
Modified: zope.index/trunk/CHANGES.txt
===================================================================
--- zope.index/trunk/CHANGES.txt	2009-07-25 07:48:34 UTC (rev 102292)
+++ zope.index/trunk/CHANGES.txt	2009-07-25 10:26:01 UTC (rev 102293)
@@ -4,8 +4,14 @@
 3.5.3 (unreleased)
 ------------------
 
-- ...
+- Fixed a broken optimization in okascore.c: it was passing a Python
+  float to the PyInt_AS_LONG() macro. This resulted in wrong scores,
+  especially on 64 bit platforms, where all scores typically ended up
+  being zero.
 
+- Changed okascore.c to produce the same results as its Python
+  equivalent, reducing the brittleness of the text index tests.
+
 3.5.2 (2009-06-09)
 ------------------
 

Modified: zope.index/trunk/src/zope/index/text/okascore.c
===================================================================
--- zope.index/trunk/src/zope/index/text/okascore.c	2009-07-25 07:48:34 UTC (rev 102292)
+++ zope.index/trunk/src/zope/index/text/okascore.c	2009-07-25 10:26:01 UTC (rev 102293)
@@ -63,7 +63,6 @@
 						   &idf, &meandoclen))
 		return NULL;
 
-	idf *= 1024.0;	/* float out part of the scaled_int computation */
 	n = PyObject_Length(d2fitems);
 	for (i = 0; i < n; ++i) {
 		PyObject *d_and_f;	/* d2f[i], a (d, f) pair */
@@ -72,7 +71,7 @@
 		PyObject *doclen;	/* ._docweight[d] */
 		double lenweight;
 		double tf;
-		PyObject *scaled_int;
+		PyObject *doc_score;
 		int status;
 
 		d_and_f = PySequence_GetItem(d2fitems, i);
@@ -93,17 +92,17 @@
 			Py_DECREF(d_and_f);
 			return NULL;
 		}
-		lenweight = B_FROM1 + B * PyInt_AS_LONG(doclen) / meandoclen;
+		lenweight = B_FROM1 + B * PyInt_AsLong(doclen) / meandoclen;
 
 		tf = f * K1_PLUS1 / (f + K1 * lenweight);
-		scaled_int = PyInt_FromLong((long)(tf * idf + 0.5));
-		if (scaled_int == NULL)
+		doc_score = PyFloat_FromDouble(tf * idf);
+		if (doc_score == NULL)
 			status = -1;
 		else
-			status = PyObject_SetItem(result, d, scaled_int);
+			status = PyObject_SetItem(result, d, doc_score);
 		Py_DECREF(d_and_f);
 		Py_DECREF(doclen);
-		Py_XDECREF(scaled_int);
+		Py_XDECREF(doc_score);
 		if (status < 0)
 			return NULL;
 	}

Modified: zope.index/trunk/src/zope/index/text/textindex.txt
===================================================================
--- zope.index/trunk/src/zope/index/text/textindex.txt	2009-07-25 07:48:34 UTC (rev 102292)
+++ zope.index/trunk/src/zope/index/text/textindex.txt	2009-07-25 10:26:01 UTC (rev 102293)
@@ -6,7 +6,6 @@
 any arguments:
 
     >>> from zope.index.text.textindex import TextIndex
-    >>> from zope.index.text.okapiindex import score
     >>> index = TextIndex()
 
 By default, it uses an "Okapi" inverted index and a lexicon with a
@@ -66,18 +65,13 @@
     ... """)
 
 Then we can search using the apply method, which takes a search
-string.  If we use the okascore.c module, we get different results
-because it uses different constants:
+string.
 
-    >>> if score is not None: result = [(1, '787.6832'), (2, '839.0846')]
-    >>> if score is None: result = [(1, '0.6153'), (2, '0.6734')]
-    >>> [(k, "%.4f" % v) for (k, v) in index.apply(u'brown fox').items()] == result
-    True
+    >>> [(k, "%.4f" % v) for (k, v) in index.apply(u'brown fox').items()]
+    [(1, '0.6153'), (2, '0.6734')]
 
-    >>> if score is not None: result = [(1, '787.7337')]
-    >>> if score is None: result = [(1, '0.6153')]
-    >>> [(k, "%.4f" % v) for (k, v) in index.apply(u'quick fox').items()] == result
-    True
+    >>> [(k, "%.4f" % v) for (k, v) in index.apply(u'quick fox').items()]
+    [(1, '0.6153')]
 
     >>> [(k, "%.4f" % v) for (k, v) in index.apply(u'brown python').items()]
     []
@@ -85,41 +79,27 @@
     >>> [(k, "%.4f" % v) for (k, v) in index.apply(u'dalmatian').items()]
     []
 
-    >>> if score is not None: result = [(1, '333.0285'), (2, '333.0285'), (8, '454.7052')]
-    >>> if score is None: result = [(1, '0.2602'), (2, '0.2529'), (8, '0.0934')]
-    >>> [(k, "%.4f" % v) for (k, v) in index.apply(u'brown or python').items()] == result
-    True
+    >>> [(k, "%.4f" % v) for (k, v) in index.apply(u'brown or python').items()]
+    [(1, '0.2602'), (2, '0.2529'), (8, '0.0934')]
 
-    >>> from zope.index.text.okapiindex import score
-    >>> if score is not None: result = [(7, '787.7707')]
-    >>> if score is None: result =  [(7, '0.6948')]
-    >>> [(k, "%.4f" % v) for (k, v) in index.apply(u'butts').items()] == result
-    True
+    >>> [(k, "%.4f" % v) for (k, v) in index.apply(u'butts').items()]
+    [(7, '0.6948')]
 
-The outputs are mappings from document ids to integer scored. Items
+The outputs are mappings from document ids to float scores. Items
 with higher scores are more relevent.
 
-We can use unicode characters in search strings.  If we use the
-okascore.c module, we get different results because it uses different
-constants:
+We can use unicode characters in search strings.
 
-    >>> if score is not None: result = [(4, '787.7707')]
-    >>> if score is None: result = [(4, '0.7427')]
-    >>> [(k, "%.4f" % v) for (k, v) in index.apply(u"Fran\xe7ois").items()] == result
-    True
+    >>> [(k, "%.4f" % v) for (k, v) in index.apply(u"Fran\xe7ois").items()]
+    [(4, '0.7427')]
 
-    >>> if score is not None: result = [(5, '787.7707')]
-    >>> if score is None: result = [(5, '0.7179')]
-    >>> [(k, "%.4f" % v) for (k, v) in index.apply(word).items()] == result
-    True
+    >>> [(k, "%.4f" % v) for (k, v) in index.apply(word).items()]
+    [(5, '0.7179')]
 
-We can use globbing in search strings.  If we use the okascore.c
-module, we get different results because it uses different constants:
+We can use globbing in search strings.
 
-    >>> if score is not None: result = [(1, '2789.000'), (2, '3153.000'), (3, '3808.000')]
-    >>> if score is None: result = [(1, '2.179'), (2, '2.651'), (3, '2.041')]
-    >>> [(k, "%.3f" % v) for (k, v) in index.apply('fo*').items()] == result
-    True
+    >>> [(k, "%.3f" % v) for (k, v) in index.apply('fo*').items()]
+    [(1, '2.179'), (2, '2.651'), (3, '2.041')]
 
 Text indexes support basic statistics:
 
@@ -134,16 +114,10 @@
     >>> index2 = TextIndex()
     >>> index2.index_doc(1, [])
     >>> index2.index_doc(1, ["Zorro"])
+    >>> [(k, "%.4f" % v) for (k, v) in index2.apply("Zorro").items()]
+    [(1, '0.4545')]
 
-If we use the okascore.c module, we get different results because it
-uses different constants:
 
-    >>> if score is not None: result = [(1, '787.5803')]
-    >>> if score is None: result = [(1, '0.4545')]
-    >>> [(k, "%.4f" % v) for (k, v) in index2.apply("Zorro").items()] == result
-    True
-
-
 Tracking Changes
 ================
 



More information about the Checkins mailing list