Index: contrib/miscellaneous/src/test/org/apache/lucene/index/TestFieldNormModifier.java =================================================================== --- contrib/miscellaneous/src/test/org/apache/lucene/index/TestFieldNormModifier.java (revision 495338) +++ contrib/miscellaneous/src/test/org/apache/lucene/index/TestFieldNormModifier.java (working copy) @@ -18,23 +18,19 @@ */ import java.io.IOException; -import java.util.Arrays; import junit.framework.TestCase; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.analysis.SimpleAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.search.DefaultSimilarity; +import org.apache.lucene.search.HitCollector; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Similarity; -import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.HitCollector; +import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.store.Directory; -import org.apache.lucene.analysis.SimpleAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; /** * Tests changing of field norms with a custom similarity and with fake norms. @@ -42,6 +38,10 @@ * @version $Id$ */ public class TestFieldNormModifier extends TestCase { + private static final int N_FIELDS_WITH_NUM = 10; + + private static final String FIELD_WITH_NUM = "fieldNum"; + public TestFieldNormModifier(String name) { super(name); } @@ -58,6 +58,14 @@ return (float)numTokens; } }; + /** adds e.g. 7 for field called fieldNum7 */ + public static Similarity simWithDelta = new DefaultSimilarity() { + public float lengthNorm(String fieldName, int numTokens) { + int k = fieldName.indexOf(FIELD_WITH_NUM); + int delta = (k<0 ? 0 : Integer.parseInt(fieldName.substring(FIELD_WITH_NUM.length()))); + return (float)(delta+numTokens); + } + }; public void setUp() throws Exception { IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true); @@ -66,11 +74,17 @@ Document d = new Document(); d.add(new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED)); d.add(new Field("nonorm", "word", Field.Store.YES, Field.Index.NO_NORMS)); - d.add(new Field("untokfield", "20061212 20071212", Field.Store.YES, Field.Index.TOKENIZED)); + d.add(new Field("untokfield", "20061212 20071212", Field.Store.YES, Field.Index.UN_TOKENIZED)); + for (int k = 0; k < N_FIELDS_WITH_NUM; k++) { + d.add(new Field(FIELD_WITH_NUM+k, "word", Field.Store.YES, Field.Index.UN_TOKENIZED)); + } for (int j = 1; j <= i; j++) { d.add(new Field("field", "crap", Field.Store.YES, Field.Index.TOKENIZED)); d.add(new Field("nonorm", "more words", Field.Store.YES, Field.Index.NO_NORMS)); + for (int k = 0; k < N_FIELDS_WITH_NUM; k++) { + d.add(new Field(FIELD_WITH_NUM+k, "word", Field.Store.YES, Field.Index.UN_TOKENIZED)); + } } writer.addDocument(d); } @@ -162,21 +176,54 @@ } } - public void testNormKiller() throws IOException { + public void testTokenizedFieldNormKiller() throws IOException { + IndexReader r = IndexReader.open(store); + assertTrue(r.hasNorms("field")); + r.close(); + + FieldNormModifier fnm = new FieldNormModifier(store, null); + fnm.killNorms("field"); + r = IndexReader.open(store); + assertFalse(r.hasNorms("field")); + r.close(); + + // verify that we still get documents in the same order as originally + IndexSearcher searcher = new IndexSearcher(store); + final float[] scores = new float[NUM_DOCS]; + float lastScore = 0.0f; + + // default similarity should return the same score for all documents for this query + searcher.search(new TermQuery(new Term("field", "word")), new HitCollector() { + public final void collect(int doc, float score) { + scores[doc] = score; + } + }); + searcher.close(); + + lastScore = scores[0]; + for (int i = 0; i < NUM_DOCS; i++) { + String msg = "i=" + i + ", " + scores[i] + " == " + lastScore; + assertTrue(msg, scores[i] == lastScore); + //System.out.println(msg); + lastScore = scores[i]; + } + } + + public void testUnTokenizedFieldNormKiller() throws IOException { IndexReader r = IndexReader.open(store); - byte[] oldNorms = r.norms("untokfield"); +// byte[] oldNorms = r.norms("untokfield"); + assertTrue(r.hasNorms("untokfield")); r.close(); - FieldNormModifier fnm = new FieldNormModifier(store, s); - fnm.reSetNorms("untokfield"); + FieldNormModifier fnm = new FieldNormModifier(store, null); + fnm.killNorms("untokfield"); r = IndexReader.open(store); - byte[] newNorms = r.norms("untokfield"); +// byte[] newNorms = r.norms("untokfield"); + assertFalse(r.hasNorms("untokfield")); r.close(); - assertFalse(Arrays.equals(oldNorms, newNorms)); - // verify that we still get documents in the same order as originally IndexSearcher searcher = new IndexSearcher(store); final float[] scores = new float[NUM_DOCS]; @@ -198,4 +245,57 @@ lastScore = scores[i]; } } + + public void testModifiedNormValuesCombinedWithKill() throws Exception { + //verify initial norms + Similarity ds = new DefaultSimilarity(); + IndexReader reader = IndexReader.open(store); + for (int i=0; i - * Opens a new IndexReader on the Directory given to this instance, - * modifies the norms (either using the Similarity given to this instance, or by using fake norms, - * and closes the IndexReader. + * Opens a new IndexReader on the Directory given to this instance and + * modifies the norms using the Similarity specified in the call to the constructor. *

* * @param field the field whose norms should be reset @@ -104,7 +109,6 @@ public void reSetNorms(String field) throws IOException { String fieldName = field.intern(); int[] termCounts = new int[0]; - byte[] fakeNorms = new byte[0]; IndexReader reader = null; TermEnum termEnum = null; @@ -112,9 +116,6 @@ try { reader = IndexReader.open(dir); termCounts = new int[reader.maxDoc()]; - // if we are killing norms, get fake ones - if (sim == null) - fakeNorms = SegmentReader.createFakeNorms(reader.maxDoc()); try { termEnum = reader.terms(new Term(field,"")); try { @@ -135,24 +136,121 @@ } finally { if (null != termEnum) termEnum.close(); } + + for (int d = 0; d < termCounts.length; d++) { + if (! reader.isDeleted(d)) + reader.setNorm(d, fieldName, sim.encodeNorm(sim.lengthNorm(fieldName, termCounts[d]))); + } } finally { if (null != reader) reader.close(); } + } + + /** + * Removes norms for the given field. The index is optimized and expanded into + * a multi-file index first. After the forms are removed, the index is packed + * back into a compound index, if that was its original format. + * @param fieldName the field whose norms should be removed + * @throws IOException + */ + public void killNorms(String fieldName) throws IOException { + // figure out if the index is a CFS index or not + SegmentInfos sis = new SegmentInfos(); + sis.read(dir); + SegmentInfo si = sis.info(0); + boolean isCompound = SegmentReader.usesCompoundFile(si); + + // ensure there is only one segment, and that the index is expanded if it's CFS + IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), false); + writer.setUseCompoundFile(false); + writer.forceOptimize(); + writer.close(); + + // find the .fnm file and .nrm files + String fnm = null; + String nrm = null; + String[] files = dir.list(); + for (int i = 0; i < files.length; i++) { + if (files[i].endsWith(".fnm")) { + fnm = files[i]; + System.out.println("FieldInfo file: " + fnm); + } else if (files[i].endsWith(".nrm")) { + nrm = files[i]; + System.out.println("Norms file: " + nrm); + } + if (fnm!=null && nrm!=null) + break; // only 1 .fnm and 1 .nrm per optimized index + } + + // switch from .nrm file to .Fn files for being able to kill norms + // 1. save all norms in .fN files + sis = new SegmentInfos(); + sis.read(dir); + si = sis.info(0); + IndexReader reader = IndexReader.open(dir); + FieldInfos fis = new FieldInfos(dir,fnm); + for (int i=0; i