[Zope-CVS] CVS: Products/ZCTextIndex - Index.py:1.1.2.16

Tim Peters tim.one@comcast.net
Fri, 3 May 2002 01:32:02 -0400


Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv24588

Modified Files:
      Tag: TextIndexDS9-branch
	Index.py 
Log Message:
_get_frequencies():  return a triple instead of a pair, to increase
efficiency (zipping the wids and freqs together by hand just so the
tuples can get picked apart again on the other end is a waste of
precious tuples <wink>).


=== Products/ZCTextIndex/Index.py 1.1.2.15 => 1.1.2.16 ===
     def index_doc(self, docid, text, threshold=None):
         wids = self._lexicon.sourceToWordIds(text)
-        freqs, docweight = self._get_frequencies(wids)
-        uniqwids = []
-        for wid, f in freqs:
-            self._add_wordinfo(wid, f, docid)
-            uniqwids.append(wid)
+        uniqwids, freqs, docweight = self._get_frequencies(wids)
+        for i in range(len(uniqwids)):
+            self._add_wordinfo(uniqwids[i], freqs[i], docid)
         self._docweight[docid] = docweight
         self._docwords[docid] = IISet(uniqwids)
 
@@ -114,17 +112,20 @@
     def _get_frequencies(self, wids):
         """Return individual doc-term weights and docweight."""
         # computes w(d, t) for each term, and W(d)
-        # returns pairt [(wid0, w(d, wid0)), (wid1, w(d, wid1)), ...], W(d)
+        # return triple
+        #    [wid0, wid1, ...]
+        #    [w(d, wid0), w(d, wid1), ...],
+        #    W(d)
         d = {}
         for wid in wids:
             d[wid] = d.get(wid, 0) + 1
         Wsquares = 0.
         freqs = []
-        for wid, count in d.items():
+        for count in d.values():
             f = doc_term_weight(count)
             Wsquares += f * f
-            freqs.append((wid, scaled_int(f)))
-        return freqs, scaled_int(math.sqrt(Wsquares))
+            freqs.append(scaled_int(f))
+        return d.keys(), freqs, scaled_int(math.sqrt(Wsquares))
 
     def _add_wordinfo(self, wid, f, docid):
         try: