Index: solr/src/java/org/apache/solr/search/SolrIndexReader.java =================================================================== --- solr/src/java/org/apache/solr/search/SolrIndexReader.java (revision 1057282) +++ solr/src/java/org/apache/solr/search/SolrIndexReader.java (working copy) @@ -288,11 +288,6 @@ } @Override - public void norms(String f, byte[] bytes, int offset) throws IOException { - in.norms(f, bytes, offset); - } - - @Override protected void doSetNorm(int d, String f, byte b) throws CorruptIndexException, IOException { in.setNorm(d, f, b); } @@ -473,11 +468,6 @@ } @Override - public void setNorm(int doc, String field, float value) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { - in.setNorm(doc, field, value); - } - - @Override public void undeleteAll() throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { in.undeleteAll(); } Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1057282) +++ lucene/CHANGES.txt (working copy) @@ -152,6 +152,17 @@ without any changes to the index will not cause any index version increment. (Simon Willnauer, Mike McCandless) +* LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you + omitNorms(true) for field "a" for 1000 documents, but then add a document with + omitNorms(false) for field "a", all documents for field "a" will have no norms. + Previously, Lucene would fill the first 1000 documents with "fake norms" from + Similarity.getDefault(). (Robert Muir, Mike Mccandless) + +* LUCENE-2846: When some documents contain field "a", and others do not, the + documents that don't have the field get a norm byte value of 0. Previously, Lucene + would populate "fake norms" with Similarity.getDefault() for these documents. + (Robert Muir, Mike Mccandless) + API Changes * LUCENE-2302, LUCENE-1458, LUCENE-2111, LUCENE-2514: Terms are no longer @@ -189,6 +200,14 @@ * LUCENE-2778: RAMDirectory now exposes newRAMFile() which allows to override and return a different RAMFile implementation. (Shai Erera) +* LUCENE-2846: Remove the deprecated IndexReader.setNorm(int, String, float). + This method was only syntactic sugar for setNorm(int, String, byte), but + using the global Similarity.getDefault().encodeNormValue. Use the byte-based + method instead to ensure that the norm is encoded with your Similarity. + Also removed norms(String, byte[], int), which was only used by MultiReader + for building top-level norms. If you really need a top-level norms, use + MultiNorms or SlowMultiReaderWrapper. (Robert Muir, Mike Mccandless) + New features * LUCENE-2604: Added RegexpQuery support to QueryParser. Regular expressions Index: lucene/src/test/org/apache/lucene/search/TestSetNorm.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestSetNorm.java (revision 1057282) +++ lucene/src/test/org/apache/lucene/search/TestSetNorm.java (working copy) @@ -50,10 +50,10 @@ // reset the boost of each instance of this document IndexReader reader = IndexReader.open(store, false); - reader.setNorm(0, "field", 1.0f); - reader.setNorm(1, "field", 2.0f); - reader.setNorm(2, "field", 4.0f); - reader.setNorm(3, "field", 16.0f); + reader.setNorm(0, "field", Similarity.getDefault().encodeNormValue(1.0f)); + reader.setNorm(1, "field", Similarity.getDefault().encodeNormValue(2.0f)); + reader.setNorm(2, "field", Similarity.getDefault().encodeNormValue(4.0f)); + reader.setNorm(3, "field", Similarity.getDefault().encodeNormValue(16.0f)); reader.close(); // check that searches are ordered by this boost Index: lucene/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java (revision 1057282) +++ lucene/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java (working copy) @@ -69,7 +69,7 @@ assertEquals("one", ir.document(hits[2].doc).get("key")); // change norm & retest - ir.setNorm(0, "key", 400f); + ir.setNorm(0, "key", Similarity.getDefault().encodeNormValue(400f)); normsQuery = new MatchAllDocsQuery("key"); hits = is.search(normsQuery, null, 1000).scoreDocs; assertEquals(3, hits.length); Index: lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java (revision 1057282) +++ lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java (working copy) @@ -276,7 +276,7 @@ IndexReader pr1Clone = (IndexReader) r1.clone(); pr1Clone.deleteDocument(10); - pr1Clone.setNorm(4, "field1", 0.5f); + pr1Clone.setNorm(4, "field1", Similarity.getDefault().encodeNormValue(0.5f)); assertTrue(Similarity.getDefault().decodeNormValue(MultiNorms.norms(r1, "field1")[4]) == norm1); assertTrue(Similarity.getDefault().decodeNormValue(MultiNorms.norms(pr1Clone, "field1")[4]) != norm1); @@ -327,7 +327,7 @@ TestIndexReaderReopen.createIndex(random, dir1, false); SegmentReader origSegmentReader = getOnlySegmentReader(IndexReader.open(dir1, false)); origSegmentReader.deleteDocument(1); - origSegmentReader.setNorm(4, "field1", 0.5f); + origSegmentReader.setNorm(4, "field1", Similarity.getDefault().encodeNormValue(0.5f)); SegmentReader clonedSegmentReader = (SegmentReader) origSegmentReader .clone(); @@ -426,7 +426,7 @@ final Directory dir1 = newDirectory(); TestIndexReaderReopen.createIndex(random, dir1, false); IndexReader orig = IndexReader.open(dir1, false); - orig.setNorm(1, "field1", 17.0f); + orig.setNorm(1, "field1", Similarity.getDefault().encodeNormValue(17.0f)); final byte encoded = Similarity.getDefault().encodeNormValue(17.0f); assertEquals(encoded, MultiNorms.norms(orig, "field1")[1]); Index: lucene/src/test/org/apache/lucene/index/TestIndexReader.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexReader.java (revision 1057282) +++ lucene/src/test/org/apache/lucene/index/TestIndexReader.java (working copy) @@ -43,6 +43,7 @@ import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Similarity; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; @@ -463,7 +464,7 @@ } try { - reader.setNorm(5, "aaa", 2.0f); + reader.setNorm(5, "aaa", Similarity.getDefault().encodeNormValue(2.0f)); fail("setNorm after close failed to throw IOException"); } catch (AlreadyClosedException e) { // expected @@ -503,7 +504,7 @@ } try { - reader.setNorm(5, "aaa", 2.0f); + reader.setNorm(5, "aaa", Similarity.getDefault().encodeNormValue(2.0f)); fail("setNorm should have hit LockObtainFailedException"); } catch (LockObtainFailedException e) { // expected @@ -533,7 +534,7 @@ // now open reader & set norm for doc 0 IndexReader reader = IndexReader.open(dir, false); - reader.setNorm(0, "content", (float) 2.0); + reader.setNorm(0, "content", Similarity.getDefault().encodeNormValue(2.0f)); // we should be holding the write lock now: assertTrue("locked", IndexWriter.isLocked(dir)); @@ -547,7 +548,7 @@ IndexReader reader2 = IndexReader.open(dir, false); // set norm again for doc 0 - reader.setNorm(0, "content", (float) 3.0); + reader.setNorm(0, "content", Similarity.getDefault().encodeNormValue(3.0f)); assertTrue("locked", IndexWriter.isLocked(dir)); reader.close(); @@ -580,12 +581,12 @@ // now open reader & set norm for doc 0 (writes to // _0_1.s0) reader = IndexReader.open(dir, false); - reader.setNorm(0, "content", (float) 2.0); + reader.setNorm(0, "content", Similarity.getDefault().encodeNormValue(2.0f)); reader.close(); // now open reader again & set norm for doc 0 (writes to _0_2.s0) reader = IndexReader.open(dir, false); - reader.setNorm(0, "content", (float) 2.0); + reader.setNorm(0, "content", Similarity.getDefault().encodeNormValue(2.0f)); reader.close(); assertFalse("failed to remove first generation norms file on writing second generation", dir.fileExists("_0_1.s0")); @@ -953,7 +954,7 @@ int docId = 12; for(int i=0;i<13;i++) { reader.deleteDocument(docId); - reader.setNorm(docId, "content", (float) 2.0); + reader.setNorm(docId, "content", Similarity.getDefault().encodeNormValue(2.0f)); docId += 12; } } @@ -1112,7 +1113,7 @@ reader = IndexReader.open(dir, false); try { - reader.setNorm(1, "content", (float) 2.0); + reader.setNorm(1, "content", Similarity.getDefault().encodeNormValue(2.0f)); fail("did not hit exception when calling setNorm on an invalid doc number"); } catch (ArrayIndexOutOfBoundsException e) { // expected Index: lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java (revision 1057282) +++ lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java (working copy) @@ -18,6 +18,7 @@ */ import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.search.Similarity; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; @@ -68,7 +69,7 @@ assertEquals("didn't delete the right number of documents", 1, delCount); // Set one norm so we get a .s0 file: - reader.setNorm(21, "content", (float) 1.5); + reader.setNorm(21, "content", Similarity.getDefault().encodeNormValue(1.5f)); reader.close(); // Now, artificially create an extra .del file & extra Index: lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java (revision 1057282) +++ lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java (working copy) @@ -203,11 +203,11 @@ IndexReader reader4C = (IndexReader) reader3C.clone(); SegmentReader segmentReader4C = getOnlySegmentReader(reader4C); assertEquals(4, reader3CCNorm.bytesRef().get()); - reader4C.setNorm(5, "field1", 0.33f); + reader4C.setNorm(5, "field1", Similarity.getDefault().encodeNormValue(0.33f)); // generate a cannot update exception in reader1 try { - reader3C.setNorm(1, "field1", 0.99f); + reader3C.setNorm(1, "field1", Similarity.getDefault().encodeNormValue(0.99f)); fail("did not hit expected exception"); } catch (Exception ex) { // expected @@ -223,7 +223,7 @@ IndexReader reader5C = (IndexReader) reader4C.clone(); SegmentReader segmentReader5C = getOnlySegmentReader(reader5C); Norm reader5CCNorm = segmentReader5C.norms.get("field1"); - reader5C.setNorm(5, "field1", 0.7f); + reader5C.setNorm(5, "field1", Similarity.getDefault().encodeNormValue(0.7f)); assertEquals(1, reader5CCNorm.bytesRef().get()); reader5C.close(); @@ -256,8 +256,8 @@ // System.out.println(" and: for "+k+" from "+newNorm+" to "+origNorm); modifiedNorms.set(i, Float.valueOf(newNorm)); modifiedNorms.set(k, Float.valueOf(origNorm)); - ir.setNorm(i, "f" + 1, newNorm); - ir.setNorm(k, "f" + 1, origNorm); + ir.setNorm(i, "f" + 1, Similarity.getDefault().encodeNormValue(newNorm)); + ir.setNorm(k, "f" + 1, Similarity.getDefault().encodeNormValue(origNorm)); // System.out.println("setNorm i: "+i); // break; } Index: lucene/src/test/org/apache/lucene/index/TestOmitNorms.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestOmitNorms.java (revision 0) +++ lucene/src/test/org/apache/lucene/index/TestOmitNorms.java (revision 0) @@ -0,0 +1,304 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.search.Similarity; +import org.apache.lucene.store.Directory; + +public class TestOmitNorms extends LuceneTestCase { + // Tests whether the DocumentWriter correctly enable the + // omitNorms bit in the FieldInfo + public void testOmitNorms() throws Exception { + Directory ram = newDirectory(); + Analyzer analyzer = new MockAnalyzer(); + IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); + Document d = new Document(); + + // this field will have norms + Field f1 = newField("f1", "This field has norms", Field.Store.NO, Field.Index.ANALYZED); + d.add(f1); + + // this field will NOT have norms + Field f2 = newField("f2", "This field has NO norms in all docs", Field.Store.NO, Field.Index.ANALYZED); + f2.setOmitNorms(true); + d.add(f2); + + writer.addDocument(d); + writer.optimize(); + // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger + // keep things constant + d = new Document(); + + // Reverse + f1.setOmitNorms(true); + d.add(f1); + + f2.setOmitNorms(false); + d.add(f2); + + writer.addDocument(d); + + // force merge + writer.optimize(); + // flush + writer.close(); + _TestUtil.checkIndex(ram); + + SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false)); + FieldInfos fi = reader.fieldInfos(); + assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f1").omitNorms); + assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f2").omitNorms); + + reader.close(); + ram.close(); + } + + // Tests whether merging of docs that have different + // omitNorms for the same field works + public void testMixedMerge() throws Exception { + Directory ram = newDirectory(); + Analyzer analyzer = new MockAnalyzer(); + IndexWriter writer = new IndexWriter( + ram, + newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer). + setMaxBufferedDocs(3). + setMergePolicy(newLogMergePolicy(2)) + ); + Document d = new Document(); + + // this field will have norms + Field f1 = newField("f1", "This field has norms", Field.Store.NO, Field.Index.ANALYZED); + d.add(f1); + + // this field will NOT have norms + Field f2 = newField("f2", "This field has NO norms in all docs", Field.Store.NO, Field.Index.ANALYZED); + f2.setOmitNorms(true); + d.add(f2); + + for (int i = 0; i < 30; i++) { + writer.addDocument(d); + } + + // now we add another document which has norms for field f2 and not for f1 and verify if the SegmentMerger + // keep things constant + d = new Document(); + + // Reverese + f1.setOmitNorms(true); + d.add(f1); + + f2.setOmitNorms(false); + d.add(f2); + + for (int i = 0; i < 30; i++) { + writer.addDocument(d); + } + + // force merge + writer.optimize(); + // flush + writer.close(); + + _TestUtil.checkIndex(ram); + + SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false)); + FieldInfos fi = reader.fieldInfos(); + assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f1").omitNorms); + assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f2").omitNorms); + + reader.close(); + ram.close(); + } + + // Make sure first adding docs that do not omitNorms for + // field X, then adding docs that do omitNorms for that same + // field, + public void testMixedRAM() throws Exception { + Directory ram = newDirectory(); + Analyzer analyzer = new MockAnalyzer(); + IndexWriter writer = new IndexWriter( + ram, + newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer). + setMaxBufferedDocs(10). + setMergePolicy(newLogMergePolicy(2)) + ); + Document d = new Document(); + + // this field will have norms + Field f1 = newField("f1", "This field has norms", Field.Store.NO, Field.Index.ANALYZED); + d.add(f1); + + // this field will NOT have norms + Field f2 = newField("f2", "This field has NO norms in all docs", Field.Store.NO, Field.Index.ANALYZED); + d.add(f2); + + for (int i = 0; i < 5; i++) { + writer.addDocument(d); + } + + f2.setOmitNorms(true); + + for (int i = 0; i < 20; i++) { + writer.addDocument(d); + } + + // force merge + writer.optimize(); + + // flush + writer.close(); + + _TestUtil.checkIndex(ram); + + SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false)); + FieldInfos fi = reader.fieldInfos(); + assertTrue("OmitNorms field bit should not be set.", !fi.fieldInfo("f1").omitNorms); + assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f2").omitNorms); + + reader.close(); + ram.close(); + } + + private void assertNoNrm(Directory dir) throws Throwable { + final String[] files = dir.listAll(); + for (int i = 0; i < files.length; i++) { + assertFalse(files[i].endsWith(".nrm")); + } + } + + // Verifies no *.nrm exists when all fields omit norms: + public void testNoNrmFile() throws Throwable { + Directory ram = newDirectory(); + Analyzer analyzer = new MockAnalyzer(); + IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig( + TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(3)); + writer.setInfoStream(VERBOSE ? System.out : null); + LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); + lmp.setMergeFactor(2); + lmp.setUseCompoundFile(false); + Document d = new Document(); + + Field f1 = newField("f1", "This field has no norms", Field.Store.NO, Field.Index.ANALYZED); + f1.setOmitNorms(true); + d.add(f1); + + for (int i = 0; i < 30; i++) { + writer.addDocument(d); + } + + writer.commit(); + + assertNoNrm(ram); + + // force merge + writer.optimize(); + // flush + writer.close(); + + assertNoNrm(ram); + _TestUtil.checkIndex(ram); + ram.close(); + } + + /** + * Tests various combinations of omitNorms=true/false, the field not existing at all, + * ensuring that only omitNorms is 'viral'. + * Internally checks that MultiNorms.norms() is consistent (returns the same bytes) + * as the optimized equivalent. + */ + public void testOmitNormsCombos() throws IOException { + // indexed with norms + Field norms = new Field("foo", "a", Field.Store.YES, Field.Index.ANALYZED); + // indexed without norms + Field noNorms = new Field("foo", "a", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS); + // not indexed, but stored + Field noIndex = new Field("foo", "a", Field.Store.YES, Field.Index.NO); + // not indexed but stored, omitNorms is set + Field noNormsNoIndex = new Field("foo", "a", Field.Store.YES, Field.Index.NO); + noNormsNoIndex.setOmitNorms(true); + // not indexed nor stored (doesnt exist at all, we index a different field instead) + Field emptyNorms = new Field("bar", "a", Field.Store.YES, Field.Index.ANALYZED); + + assertNotNull(getNorms("foo", norms, norms)); + assertNull(getNorms("foo", norms, noNorms)); + assertNotNull(getNorms("foo", norms, noIndex)); + assertNotNull(getNorms("foo", norms, noNormsNoIndex)); + assertNotNull(getNorms("foo", norms, emptyNorms)); + assertNull(getNorms("foo", noNorms, noNorms)); + assertNull(getNorms("foo", noNorms, noIndex)); + assertNull(getNorms("foo", noNorms, noNormsNoIndex)); + assertNull(getNorms("foo", noNorms, emptyNorms)); + assertNull(getNorms("foo", noIndex, noIndex)); + assertNull(getNorms("foo", noIndex, noNormsNoIndex)); + assertNull(getNorms("foo", noIndex, emptyNorms)); + assertNull(getNorms("foo", noNormsNoIndex, noNormsNoIndex)); + assertNull(getNorms("foo", noNormsNoIndex, emptyNorms)); + assertNull(getNorms("foo", emptyNorms, emptyNorms)); + } + + /** + * Indexes at least 1 document with f1, and at least 1 document with f2. + * returns the norms for "field". + */ + static byte[] getNorms(String field, Field f1, Field f2) throws IOException { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()); + RandomIndexWriter riw = new RandomIndexWriter(random, dir, iwc); + + // add f1 + Document d = new Document(); + d.add(f1); + riw.addDocument(d); + + // add f2 + d = new Document(); + d.add(f2); + riw.addDocument(d); + + // add a mix of f1's and f2's + int numExtraDocs = _TestUtil.nextInt(random, 1, 1000); + for (int i = 0; i < numExtraDocs; i++) { + d = new Document(); + d.add(random.nextBoolean() ? f1 : f2); + riw.addDocument(d); + } + + IndexReader ir1 = riw.getReader(); + byte[] norms1 = MultiNorms.norms(ir1, field); + + // optimize and validate MultiNorms against single segment. + riw.optimize(); + IndexReader ir2 = riw.getReader(); + byte[] norms2 = ir2.getSequentialSubReaders()[0].norms(field); + + assertArrayEquals(norms1, norms2); + ir1.close(); + ir2.close(); + riw.close(); + dir.close(); + return norms1; + } +} Property changes on: lucene\src\test\org\apache\lucene\index\TestOmitNorms.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java (revision 1057282) +++ lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java (working copy) @@ -37,6 +37,7 @@ import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Similarity; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; @@ -614,8 +615,8 @@ IndexReader reader2 = reader1.reopen(); modifier = IndexReader.open(dir1, false); - modifier.setNorm(1, "field1", 50); - modifier.setNorm(1, "field2", 50); + modifier.setNorm(1, "field1", Similarity.getDefault().encodeNormValue(50f)); + modifier.setNorm(1, "field2", Similarity.getDefault().encodeNormValue(50f)); modifier.close(); IndexReader reader3 = reader2.reopen(); @@ -708,7 +709,7 @@ protected void modifyIndex(int i) throws IOException { if (i % 3 == 0) { IndexReader modifier = IndexReader.open(dir, false); - modifier.setNorm(i, "field1", 50); + modifier.setNorm(i, "field1", Similarity.getDefault().encodeNormValue(50f)); modifier.close(); } else if (i % 3 == 1) { IndexReader modifier = IndexReader.open(dir, false); @@ -984,9 +985,9 @@ } case 1: { IndexReader reader = IndexReader.open(dir, false); - reader.setNorm(4, "field1", 123); - reader.setNorm(44, "field2", 222); - reader.setNorm(44, "field4", 22); + reader.setNorm(4, "field1", Similarity.getDefault().encodeNormValue(123f)); + reader.setNorm(44, "field2", Similarity.getDefault().encodeNormValue(222f)); + reader.setNorm(44, "field4", Similarity.getDefault().encodeNormValue(22f)); reader.close(); break; } @@ -1007,8 +1008,8 @@ } case 4: { IndexReader reader = IndexReader.open(dir, false); - reader.setNorm(5, "field1", 123); - reader.setNorm(55, "field2", 222); + reader.setNorm(5, "field1", Similarity.getDefault().encodeNormValue(123f)); + reader.setNorm(55, "field2", Similarity.getDefault().encodeNormValue(222f)); reader.close(); break; } Index: lucene/src/test/org/apache/lucene/index/TestParallelReader.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestParallelReader.java (revision 1057282) +++ lucene/src/test/org/apache/lucene/index/TestParallelReader.java (working copy) @@ -147,7 +147,7 @@ assertTrue(pr.isCurrent()); IndexReader modifier = IndexReader.open(dir1, false); - modifier.setNorm(0, "f1", 100); + modifier.setNorm(0, "f1", Similarity.getDefault().encodeNormValue(100f)); modifier.close(); // one of the two IndexReaders which ParallelReader is using @@ -155,7 +155,7 @@ assertFalse(pr.isCurrent()); modifier = IndexReader.open(dir2, false); - modifier.setNorm(0, "f3", 100); + modifier.setNorm(0, "f3", Similarity.getDefault().encodeNormValue(100f)); modifier.close(); // now both are not current anymore Index: lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java (revision 1057282) +++ lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java (working copy) @@ -30,6 +30,7 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Similarity; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; @@ -607,7 +608,7 @@ writer.close(); IndexReader reader = IndexReader.open(dir, policy, false); reader.deleteDocument(3*i+1); - reader.setNorm(4*i+1, "content", 2.0F); + reader.setNorm(4*i+1, "content", Similarity.getDefault().encodeNormValue(2.0F)); IndexSearcher searcher = new IndexSearcher(reader); ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; assertEquals(16*(1+i), hits.length); @@ -715,7 +716,7 @@ writer.close(); IndexReader reader = IndexReader.open(dir, policy, false); reader.deleteDocument(3); - reader.setNorm(5, "content", 2.0F); + reader.setNorm(5, "content", Similarity.getDefault().encodeNormValue(2.0F)); IndexSearcher searcher = new IndexSearcher(reader); ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; assertEquals(16, hits.length); Index: lucene/src/test/org/apache/lucene/index/TestSegmentReader.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestSegmentReader.java (revision 1057282) +++ lucene/src/test/org/apache/lucene/index/TestSegmentReader.java (working copy) @@ -27,7 +27,6 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; -import org.apache.lucene.search.Similarity; import org.apache.lucene.store.Directory; public class TestSegmentReader extends LuceneTestCase { @@ -180,15 +179,9 @@ assertEquals(reader.hasNorms(f.name()), !f.getOmitNorms()); assertEquals(reader.hasNorms(f.name()), !DocHelper.noNorms.containsKey(f.name())); if (!reader.hasNorms(f.name())) { - // test for fake norms of 1.0 or null depending on the flag + // test for norms of null byte [] norms = MultiNorms.norms(reader, f.name()); - byte norm1 = Similarity.getDefault().encodeNormValue(1.0f); assertNull(norms); - norms = new byte[reader.maxDoc()]; - MultiNorms.norms(reader, f.name(),norms, 0); - for (int j=0; j - * WARNING: If you override this method, you should change the default - * Similarity to your implementation with {@link Similarity#setDefault(Similarity)}. - * Otherwise, your method may not always be called, especially if you omit norms - * for some fields. * @see #encodeNormValue(float) */ public float decodeNormValue(byte b) { @@ -662,11 +657,6 @@ * are rounded down to the largest representable value. Positive values too * small to represent are rounded up to the smallest positive representable * value. - *

- * WARNING: If you override this method, you should change the default - * Similarity to your implementation with {@link Similarity#setDefault(Similarity)}. - * Otherwise, your method may not always be called, especially if you omit norms - * for some fields. * @see org.apache.lucene.document.Field#setBoost(float) * @see org.apache.lucene.util.SmallFloat */ Index: lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java =================================================================== --- lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java (revision 1057282) +++ lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java (working copy) @@ -18,7 +18,6 @@ */ import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.search.Similarity; /** Taps into DocInverter, as an InvertedDocEndConsumer, * which is called at the end of inverting each field. We Index: lucene/src/java/org/apache/lucene/index/MultiReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/MultiReader.java (revision 1057282) +++ lucene/src/java/org/apache/lucene/index/MultiReader.java (working copy) @@ -306,12 +306,6 @@ } @Override - public synchronized void norms(String field, byte[] result, int offset) - throws IOException { - throw new UnsupportedOperationException("please use MultiNorms.norms, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level norms"); - } - - @Override protected void doSetNorm(int n, String field, byte value) throws CorruptIndexException, IOException { int i = readerIndex(n); // find segment num Index: lucene/src/java/org/apache/lucene/index/FieldInfos.java =================================================================== --- lucene/src/java/org/apache/lucene/index/FieldInfos.java (revision 1057282) +++ lucene/src/java/org/apache/lucene/index/FieldInfos.java (working copy) @@ -278,16 +278,23 @@ } public boolean hasVectors() { - boolean hasVectors = false; for (int i = 0; i < size(); i++) { if (fieldInfo(i).storeTermVector) { - hasVectors = true; - break; + return true; } } - return hasVectors; + return false; } + public boolean hasNorms() { + for (int i = 0; i < size(); i++) { + if (!fieldInfo(i).omitNorms) { + return true; + } + } + return false; + } + public void write(Directory d, String name) throws IOException { IndexOutput output = d.createOutput(name); try { Index: lucene/src/java/org/apache/lucene/index/DirectoryReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DirectoryReader.java (revision 1057282) +++ lucene/src/java/org/apache/lucene/index/DirectoryReader.java (working copy) @@ -602,12 +602,6 @@ } @Override - public synchronized void norms(String field, byte[] result, int offset) - throws IOException { - throw new UnsupportedOperationException("please use MultiNorms.norms, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level norms"); - } - - @Override protected void doSetNorm(int n, String field, byte value) throws CorruptIndexException, IOException { int i = readerIndex(n); // find segment num Index: lucene/src/java/org/apache/lucene/index/FilterIndexReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (revision 1057282) +++ lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (working copy) @@ -351,12 +351,6 @@ } @Override - public void norms(String f, byte[] bytes, int offset) throws IOException { - ensureOpen(); - in.norms(f, bytes, offset); - } - - @Override protected void doSetNorm(int d, String f, byte b) throws CorruptIndexException, IOException { in.setNorm(d, f, b); } Index: lucene/src/java/org/apache/lucene/index/SegmentMerger.java =================================================================== --- lucene/src/java/org/apache/lucene/index/SegmentMerger.java (revision 1057282) +++ lucene/src/java/org/apache/lucene/index/SegmentMerger.java (working copy) @@ -19,6 +19,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.List; @@ -570,13 +571,6 @@ } private void mergeNorms() throws IOException { - // get needed buffer size by finding the largest segment - int bufferSize = 0; - for (IndexReader reader : readers) { - bufferSize = Math.max(bufferSize, reader.maxDoc()); - } - - byte[] normBuffer = null; IndexOutput output = null; try { for (int i = 0, numFieldInfos = fieldInfos.size(); i < numFieldInfos; i++) { @@ -586,12 +580,15 @@ output = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION)); output.writeBytes(NORMS_HEADER,NORMS_HEADER.length); } - if (normBuffer == null) { - normBuffer = new byte[bufferSize]; - } for (IndexReader reader : readers) { final int maxDoc = reader.maxDoc(); - reader.norms(fi.name, normBuffer, 0); + byte normBuffer[] = reader.norms(fi.name); + if (normBuffer == null) { + // Can be null if this segment doesn't have + // any docs with this field + normBuffer = new byte[maxDoc]; + Arrays.fill(normBuffer, (byte)0); + } if (!reader.hasDeletions()) { //optimized case for segments without deleted docs output.writeBytes(normBuffer, maxDoc); Index: lucene/src/java/org/apache/lucene/index/ParallelReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/ParallelReader.java (revision 1057282) +++ lucene/src/java/org/apache/lucene/index/ParallelReader.java (working copy) @@ -21,11 +21,8 @@ import org.apache.lucene.document.FieldSelector; import org.apache.lucene.document.FieldSelectorResult; import org.apache.lucene.document.Fieldable; -import org.apache.lucene.index.IndexReader.ReaderContext; import org.apache.lucene.util.Bits; -import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close -import org.apache.lucene.search.Similarity; import org.apache.lucene.util.BytesRef; import java.io.IOException; @@ -439,6 +436,8 @@ return bytes; if (!hasNorms(field)) return null; + if (normsCache.containsKey(field)) // cached omitNorms, not missing key + return null; bytes = MultiNorms.norms(reader, field); normsCache.put(field, bytes); @@ -446,23 +445,6 @@ } @Override - public synchronized void norms(String field, byte[] result, int offset) - throws IOException { - // TODO: maybe optimize - ensureOpen(); - IndexReader reader = fieldToReader.get(field); - if (reader==null) - return; - - byte[] norms = norms(field); - if (norms == null) { - Arrays.fill(result, offset, result.length, Similarity.getDefault().encodeNormValue(1.0f)); - } else { - System.arraycopy(norms, 0, result, offset, maxDoc()); - } - } - - @Override protected void doSetNorm(int n, String field, byte value) throws CorruptIndexException, IOException { IndexReader reader = fieldToReader.get(field); Index: lucene/src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/SegmentReader.java (revision 1057282) +++ lucene/src/java/org/apache/lucene/index/SegmentReader.java (working copy) @@ -19,7 +19,6 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; @@ -31,7 +30,6 @@ import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; -import org.apache.lucene.search.Similarity; import org.apache.lucene.store.BufferedIndexInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; @@ -991,22 +989,6 @@ norm.copyOnWrite()[doc] = value; // set the value } - /** Read norms into a pre-allocated array. */ - @Override - public synchronized void norms(String field, byte[] bytes, int offset) - throws IOException { - - ensureOpen(); - Norm norm = norms.get(field); - if (norm == null) { - Arrays.fill(bytes, offset, bytes.length, Similarity.getDefault().encodeNormValue(1.0f)); - return; - } - - norm.bytes(bytes, offset, maxDoc()); - } - - private void openNorms(Directory cfsDir, int readBufferSize) throws IOException { long nextNormSeek = SegmentMerger.NORMS_HEADER.length; //skip header (header unused for now) int maxDoc = maxDoc(); Index: lucene/src/java/org/apache/lucene/index/FieldInfo.java =================================================================== --- lucene/src/java/org/apache/lucene/index/FieldInfo.java (revision 1057282) +++ lucene/src/java/org/apache/lucene/index/FieldInfo.java (working copy) @@ -52,7 +52,7 @@ this.storeOffsetWithTermVector = false; this.storePositionWithTermVector = false; this.storePayloads = false; - this.omitNorms = true; + this.omitNorms = false; this.omitTermFreqAndPositions = false; } } @@ -82,7 +82,7 @@ this.storePayloads = true; } if (this.omitNorms != omitNorms) { - this.omitNorms = false; // once norms are stored, always store + this.omitNorms = true; // if one require omitNorms at least once, it remains off for life } if (this.omitTermFreqAndPositions != omitTermFreqAndPositions) { this.omitTermFreqAndPositions = true; // if one require omitTermFreqAndPositions at least once, it remains off for life Index: lucene/src/java/org/apache/lucene/index/IndexReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/IndexReader.java (revision 1057282) +++ lucene/src/java/org/apache/lucene/index/IndexReader.java (working copy) @@ -931,14 +931,6 @@ */ public abstract byte[] norms(String field) throws IOException; - /** Reads the byte-encoded normalization factor for the named field of every - * document. This is used by the search code to score documents. - * - * @see org.apache.lucene.document.Field#setBoost(float) - */ - public abstract void norms(String field, byte[] bytes, int offset) - throws IOException; - /** Expert: Resets the normalization factor for the named field of the named * document. The norm represents the product of the field's {@link * org.apache.lucene.document.Fieldable#setBoost(float) boost} and its {@link Similarity#lengthNorm(String, @@ -970,26 +962,6 @@ protected abstract void doSetNorm(int doc, String field, byte value) throws CorruptIndexException, IOException; - /** Expert: Resets the normalization factor for the named field of the named - * document. - * - * @see #norms(String) - * @see Similarity#decodeNormValue(byte) - * - * @throws StaleReaderException if the index has changed - * since this reader was opened - * @throws CorruptIndexException if the index is corrupt - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws IOException if there is a low-level IO error - */ - public void setNorm(int doc, String field, float value) - throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { - ensureOpen(); - setNorm(doc, field, Similarity.getDefault().encodeNormValue(value)); - } - /** Flex API: returns {@link Fields} for this reader. * This method may return null if the reader has no * postings. Index: lucene/src/java/org/apache/lucene/index/CheckIndex.java =================================================================== --- lucene/src/java/org/apache/lucene/index/CheckIndex.java (revision 1057282) +++ lucene/src/java/org/apache/lucene/index/CheckIndex.java (working copy) @@ -548,10 +548,10 @@ if (infoStream != null) { infoStream.print(" test: field norms........."); } - final byte[] b = new byte[reader.maxDoc()]; + byte[] b; for (final String fieldName : fieldNames) { if (reader.hasNorms(fieldName)) { - reader.norms(fieldName, b, 0); + b = reader.norms(fieldName); ++status.totFields; } } Index: lucene/src/java/org/apache/lucene/index/Fields.java =================================================================== --- lucene/src/java/org/apache/lucene/index/Fields.java (revision 1057282) +++ lucene/src/java/org/apache/lucene/index/Fields.java (working copy) @@ -28,7 +28,7 @@ * names. This will not return null. */ public abstract FieldsEnum iterator() throws IOException; - /** Get the {@link Terms} for this field. This may return + /** Get the {@link Terms} for this field. This will return * null if the field does not exist. */ public abstract Terms terms(String field) throws IOException; Index: lucene/src/java/org/apache/lucene/index/MultiNorms.java =================================================================== --- lucene/src/java/org/apache/lucene/index/MultiNorms.java (revision 1057282) +++ lucene/src/java/org/apache/lucene/index/MultiNorms.java (working copy) @@ -22,7 +22,6 @@ import java.util.Arrays; import java.util.List; -import org.apache.lucene.search.Similarity; import org.apache.lucene.util.ReaderUtil; /** @@ -61,26 +60,24 @@ ReaderUtil.gatherSubReaders(leaves, r); int end = 0; for (IndexReader leaf : leaves) { + Fields fields = leaf.fields(); + boolean hasField = (fields != null && fields.terms(field) != null); + int start = end; - leaf.norms(field, norms, start); + byte leafNorms[] = leaf.norms(field); + if (leafNorms == null) { + if (hasField) { // omitted norms + return null; + } + // doesn't have field, fill bytes + leafNorms = new byte[leaf.maxDoc()]; + Arrays.fill(leafNorms, (byte) 0); + } + + System.arraycopy(leafNorms, 0, norms, start, leafNorms.length); end += leaf.maxDoc(); } return norms; } } - - /** - * Warning: this is heavy! Do not use in a loop, or implement norms() - * in your own reader with this (you should likely cache the result). - */ - public static void norms(IndexReader r, String field, byte[] bytes, int offset) - throws IOException { - // TODO: optimize more maybe - byte[] norms = norms(r, field); - if (norms == null) { - Arrays.fill(bytes, offset, bytes.length, Similarity.getDefault().encodeNormValue(1.0f)); - } else { - System.arraycopy(norms, 0, bytes, offset, r.maxDoc()); - } - } } Index: lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java =================================================================== --- lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java (revision 1057282) +++ lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java (working copy) @@ -18,19 +18,14 @@ */ import java.io.IOException; -import java.util.Arrays; import java.util.HashMap; -import java.util.List; -import java.util.ArrayList; import java.util.Map; -import org.apache.lucene.search.Similarity; import org.apache.lucene.util.Bits; import org.apache.lucene.util.ReaderUtil; // javadoc import org.apache.lucene.index.DirectoryReader; // javadoc import org.apache.lucene.index.MultiReader; // javadoc -import org.apache.lucene.index.IndexReader.ReaderContext; /** * This class forces a composite reader (eg a {@link @@ -88,23 +83,13 @@ return bytes; if (!hasNorms(field)) return null; - + if (normsCache.containsKey(field)) // cached omitNorms, not missing key + return null; + bytes = MultiNorms.norms(in, field); normsCache.put(field, bytes); return bytes; } - - @Override - public synchronized void norms(String field, byte[] bytes, int offset) throws IOException { - // TODO: maybe optimize - ensureOpen(); - byte[] norms = norms(field); - if (norms == null) { - Arrays.fill(bytes, offset, bytes.length, Similarity.getDefault().encodeNormValue(1.0f)); - } else { - System.arraycopy(norms, 0, bytes, offset, maxDoc()); - } - } @Override public ReaderContext getTopReaderContext() { Index: lucene/src/java/org/apache/lucene/index/NormsWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/NormsWriter.java (revision 1057282) +++ lucene/src/java/org/apache/lucene/index/NormsWriter.java (working copy) @@ -26,7 +26,6 @@ import java.util.ArrayList; import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.search.Similarity; // TODO FI: norms could actually be stored as doc store @@ -37,7 +36,6 @@ final class NormsWriter extends InvertedDocEndConsumer { - private final byte defaultNorm = Similarity.getDefault().encodeNormValue(1.0f); private FieldInfos fieldInfos; @Override public InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) { @@ -62,6 +60,10 @@ final Map> byField = new HashMap>(); + if (!fieldInfos.hasNorms()) { + return; + } + // Typically, each thread will have encountered the same // field. So first we collate by field, ie, all // per-thread field instances that correspond to the @@ -137,7 +139,7 @@ // Fill hole for(;upto>(getIndex().getNormsByFieldNameAndDocumentNumber().size()); Index: lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java =================================================================== --- lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java (revision 1057282) +++ lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java (working copy) @@ -201,9 +201,9 @@ byte[] oldNorms = index.getNormsByFieldNameAndDocumentNumber().get(field); if (oldNorms != null) { System.arraycopy(oldNorms, 0, norms, 0, oldNorms.length); - Arrays.fill(norms, oldNorms.length, norms.length, similarity.encodeNormValue(1.0f)); + Arrays.fill(norms, oldNorms.length, norms.length, (byte) 0); } else { - Arrays.fill(norms, 0, norms.length, similarity.encodeNormValue(1.0f)); + Arrays.fill(norms, 0, norms.length, (byte) 0); } normsByFieldNameAndDocumentNumber.put(field, norms); fieldNames.remove(field); @@ -211,7 +211,7 @@ for (String field : fieldNames) { //System.out.println(field); byte[] norms = new byte[index.getDocumentsByNumber().length + termDocumentInformationFactoryByDocument.size()]; - Arrays.fill(norms, 0, norms.length, similarity.encodeNormValue(1.0f)); + Arrays.fill(norms, 0, norms.length, (byte) 0); normsByFieldNameAndDocumentNumber.put(field, norms); } fieldNames.clear(); Index: lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java =================================================================== --- lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (revision 1057282) +++ lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (working copy) @@ -1174,13 +1174,6 @@ } @Override - public void norms(String fieldName, byte[] bytes, int offset) { - if (DEBUG) System.err.println("MemoryIndexReader.norms*: " + fieldName); - byte[] norms = norms(fieldName); - System.arraycopy(norms, 0, bytes, offset, norms.length); - } - - @Override protected void doSetNorm(int doc, String fieldName, byte value) { throw new UnsupportedOperationException(); }