Index: src/java/org/apache/lucene/index/FieldInfos.java =================================================================== --- src/java/org/apache/lucene/index/FieldInfos.java (revision 495178) +++ src/java/org/apache/lucene/index/FieldInfos.java (working copy) @@ -32,7 +32,7 @@ * be adding documents at a time, with no other reader or writer threads * accessing this object. */ -final class FieldInfos { +class FieldInfos { static final byte IS_INDEXED = 0x1; static final byte STORE_TERMVECTOR = 0x2; Index: src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- src/java/org/apache/lucene/index/IndexWriter.java (revision 495178) +++ src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -781,6 +781,16 @@ } } + /** + * Forces index optimization. See {@link #optimize()}. + * @throws IOException + */ + public synchronized void forceOptimize() throws IOException { + flushRamSegments(); + int minSegment = segmentInfos.size() - mergeFactor; + mergeSegments(segmentInfos, minSegment < 0 ? 0 : minSegment, segmentInfos.size()); + } + /* * Begin a transaction. During a transaction, any segment * merges that happen (or ram segments flushed) will not Index: contrib/miscellaneous/src/test/org/apache/lucene/index/TestFieldNormModifier.java =================================================================== --- contrib/miscellaneous/src/test/org/apache/lucene/index/TestFieldNormModifier.java (revision 495178) +++ contrib/miscellaneous/src/test/org/apache/lucene/index/TestFieldNormModifier.java (working copy) @@ -18,23 +18,19 @@ */ import java.io.IOException; -import java.util.Arrays; import junit.framework.TestCase; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.analysis.SimpleAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.search.DefaultSimilarity; +import org.apache.lucene.search.HitCollector; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Similarity; -import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.HitCollector; +import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.store.Directory; -import org.apache.lucene.analysis.SimpleAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; /** * Tests changing of field norms with a custom similarity and with fake norms. @@ -66,7 +62,7 @@ Document d = new Document(); d.add(new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED)); d.add(new Field("nonorm", "word", Field.Store.YES, Field.Index.NO_NORMS)); - d.add(new Field("untokfield", "20061212 20071212", Field.Store.YES, Field.Index.TOKENIZED)); + d.add(new Field("untokfield", "20061212 20071212", Field.Store.YES, Field.Index.UN_TOKENIZED)); for (int j = 1; j <= i; j++) { d.add(new Field("field", "crap", Field.Store.YES, Field.Index.TOKENIZED)); @@ -162,21 +158,54 @@ } } - public void testNormKiller() throws IOException { + public void testTokenizedFieldNormKiller() throws IOException { + IndexReader r = IndexReader.open(store); + assertTrue(r.hasNorms("field")); + r.close(); + + FieldNormModifier fnm = new FieldNormModifier(store, null); + fnm.killNorms("field"); + r = IndexReader.open(store); + assertFalse(r.hasNorms("field")); + r.close(); + + // verify that we still get documents in the same order as originally + IndexSearcher searcher = new IndexSearcher(store); + final float[] scores = new float[NUM_DOCS]; + float lastScore = 0.0f; + + // default similarity should return the same score for all documents for this query + searcher.search(new TermQuery(new Term("field", "word")), new HitCollector() { + public final void collect(int doc, float score) { + scores[doc] = score; + } + }); + searcher.close(); + + lastScore = scores[0]; + for (int i = 0; i < NUM_DOCS; i++) { + String msg = "i=" + i + ", " + scores[i] + " == " + lastScore; + assertTrue(msg, scores[i] == lastScore); + //System.out.println(msg); + lastScore = scores[i]; + } + } + + public void testUnTokenizedFieldNormKiller() throws IOException { IndexReader r = IndexReader.open(store); - byte[] oldNorms = r.norms("untokfield"); +// byte[] oldNorms = r.norms("untokfield"); + assertTrue(r.hasNorms("untokfield")); r.close(); - FieldNormModifier fnm = new FieldNormModifier(store, s); - fnm.reSetNorms("untokfield"); + FieldNormModifier fnm = new FieldNormModifier(store, null); + fnm.killNorms("untokfield"); r = IndexReader.open(store); - byte[] newNorms = r.norms("untokfield"); +// byte[] newNorms = r.norms("untokfield"); + assertFalse(r.hasNorms("untokfield")); r.close(); - assertFalse(Arrays.equals(oldNorms, newNorms)); - // verify that we still get documents in the same order as originally IndexSearcher searcher = new IndexSearcher(store); final float[] scores = new float[NUM_DOCS]; Index: contrib/miscellaneous/src/java/org/apache/lucene/index/FieldNormModifier.java =================================================================== --- contrib/miscellaneous/src/java/org/apache/lucene/index/FieldNormModifier.java (revision 495178) +++ contrib/miscellaneous/src/java/org/apache/lucene/index/FieldNormModifier.java (working copy) @@ -19,9 +19,11 @@ import java.io.IOException; import java.util.Date; +import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.search.Similarity; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.IndexOutput; /** * Given a directory and a list of fields, updates the fieldNorms in place for every document. @@ -67,7 +69,10 @@ for (int i = 2; i < args.length; i++) { System.out.print("Updating field: " + args[i] + " " + (new Date()).toString() + " ... "); - fnm.reSetNorms(args[i]); + if (s == null) + fnm.killNorms(args[i]); + else + fnm.reSetNorms(args[i]); System.out.println(new Date().toString()); } @@ -94,9 +99,8 @@ * Resets the norms for the specified field. * *

- * Opens a new IndexReader on the Directory given to this instance, - * modifies the norms (either using the Similarity given to this instance, or by using fake norms, - * and closes the IndexReader. + * Opens a new IndexReader on the Directory given to this instance and + * modifies the norms using the Similarity specified in the call to the constructor. *

* * @param field the field whose norms should be reset @@ -104,7 +108,6 @@ public void reSetNorms(String field) throws IOException { String fieldName = field.intern(); int[] termCounts = new int[0]; - byte[] fakeNorms = new byte[0]; IndexReader reader = null; TermEnum termEnum = null; @@ -112,9 +115,6 @@ try { reader = IndexReader.open(dir); termCounts = new int[reader.maxDoc()]; - // if we are killing norms, get fake ones - if (sim == null) - fakeNorms = SegmentReader.createFakeNorms(reader.maxDoc()); try { termEnum = reader.terms(new Term(field,"")); try { @@ -135,24 +135,87 @@ } finally { if (null != termEnum) termEnum.close(); } + + for (int d = 0; d < termCounts.length; d++) { + if (! reader.isDeleted(d)) + reader.setNorm(d, fieldName, sim.encodeNorm(sim.lengthNorm(fieldName, termCounts[d]))); + } } finally { if (null != reader) reader.close(); } + } + + /** + * Removes norms for the given field. The index is optimized and expanded into + * a multi-file index first. After the forms are removed, the index is packed + * back into a compound index, if that was its original format. + * @param fieldName the field whose norms should be removed + * @throws IOException + */ + public void killNorms(String fieldName) throws IOException { + // figure out if the index is a CFS index or not + SegmentInfos sis = new SegmentInfos(); + sis.read(dir); + SegmentInfo si = sis.info(0); + boolean isCompound = SegmentReader.usesCompoundFile(si); + + // ensure there is only one segment, and that the index is expanded if it's CFS + IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), false); + writer.setUseCompoundFile(false); + writer.forceOptimize(); + writer.close(); + + // find the .fnm file + String[] files = dir.list(); + for (int i = 0; i < files.length; i++) { + if (files[i].endsWith(".fnm")) { + String fnm = files[i]; + System.out.println("FieldInfo file: " + fnm); + ChangeableFieldInfos cfi = new ChangeableFieldInfos(dir, fnm); + cfi.change(fieldName, true); + break; // only 1 .fnm per index + } + } + + // if the index was a CFS index, pack it back + if (isCompound) { + writer = new IndexWriter(dir, new SimpleAnalyzer(), false); + writer.setUseCompoundFile(true); + writer.forceOptimize(); + writer.close(); + } + } + + class ChangeableFieldInfos extends FieldInfos { + private Directory dir; + private String fileName; - try { - reader = IndexReader.open(dir); - for (int d = 0; d < termCounts.length; d++) { - if (! reader.isDeleted(d)) { - if (sim == null) - reader.setNorm(d, fieldName, fakeNorms[0]); - else - reader.setNorm(d, fieldName, sim.encodeNorm(sim.lengthNorm(fieldName, termCounts[d]))); + ChangeableFieldInfos(Directory dir, String fileName) throws IOException { + super(dir, fileName); + this.dir = dir; + this.fileName = fileName; + } + + void change(String fieldName, boolean omitNorms) throws IOException { + // set the omitNorms bit + FieldInfo fi = fieldInfo(fieldName); + fi.omitNorms = omitNorms; + + System.out.println("Write file: " + fileName); + IndexOutput output = dir.createOutput(fileName); + write(output); + output.close(); + + // find and delete all .fN file files for the field + int fieldNumber = fieldNumber(fieldName); + String[] files = dir.list(); + for (int i = 0; i < files.length; i++) { + if (files[i].endsWith(".f" + fieldNumber)) { + String normsFile = files[i]; + System.out.println("Removing field norms file: " + normsFile); + dir.deleteFile(normsFile); } - } - - } finally { - if (null != reader) reader.close(); + } } } - }