Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1000581) +++ lucene/CHANGES.txt (working copy) @@ -232,6 +232,10 @@ * LUCENE-2648: PackedInts.Iterator now supports to advance by more than a single ordinal. (Simon Willnauer) +* LUCENE-2664: Add SimpleText codec, which stores all terms/postings + data in a single text file for transparency (at the expense of poor + performance). (Sahin Buyrukbilen via Mike McCandless) + Optimizations * LUCENE-2410: ~20% speedup on exact (slop=0) PhraseQuery matching. Index: lucene/src/test/org/apache/lucene/TestDemo.java =================================================================== --- lucene/src/test/org/apache/lucene/TestDemo.java (revision 1000007) +++ lucene/src/test/org/apache/lucene/TestDemo.java (working copy) @@ -23,15 +23,14 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; -import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; @@ -51,9 +50,7 @@ Directory directory = newDirectory(); // To store an index on disk, use this instead: //Directory directory = FSDirectory.open("/tmp/testindex"); - IndexWriter iwriter = new IndexWriter(directory, new IndexWriterConfig( - TEST_VERSION_CURRENT, analyzer).setMaxFieldLength(25000)); - + RandomIndexWriter iwriter = new RandomIndexWriter(random, directory); Document doc = new Document(); String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm"; String text = "This is the text to be indexed. " + longTerm; @@ -69,16 +66,19 @@ // Parse a simple query that searches for "text": QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "fieldname", analyzer); Query query = parser.parse("text"); - ScoreDoc[] hits = isearcher.search(query, null, 1).scoreDocs; - assertEquals(1, hits.length); + TopDocs hits = isearcher.search(query, null, 1); + assertEquals(1, hits.totalHits); // Iterate through the results: - for (int i = 0; i < hits.length; i++) { - Document hitDoc = isearcher.doc(hits[i].doc); + for (int i = 0; i < hits.scoreDocs.length; i++) { + Document hitDoc = isearcher.doc(hits.scoreDocs[i].doc); assertEquals(text, hitDoc.get("fieldname")); } + + // Test simple phrase query + query = parser.parse("\"to be\""); + assertEquals(1, isearcher.search(query, null, 1).totalHits); + isearcher.close(); directory.close(); - } - } Index: lucene/src/test/org/apache/lucene/index/TestPayloads.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestPayloads.java (revision 1000007) +++ lucene/src/test/org/apache/lucene/index/TestPayloads.java (working copy) @@ -25,7 +25,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Random; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockTokenizer; @@ -157,19 +156,19 @@ public void testPayloadsEncoding() throws Exception { // first perform the test using a RAMDirectory Directory dir = newDirectory(); - performTest(random, dir); + performTest(dir); dir.close(); // now use a FSDirectory and repeat same test File dirName = _TestUtil.getTempDir("test_payloads"); dir = FSDirectory.open(dirName); - performTest(random, dir); + performTest(dir); _TestUtil.rmDir(dirName); dir.close(); } // builds an index with payloads in the given Directory and performs // different tests to verify the payload encoding - private void performTest(Random random, Directory dir) throws Exception { + private void performTest(Directory dir) throws Exception { PayloadAnalyzer analyzer = new PayloadAnalyzer(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer) @@ -247,8 +246,10 @@ for (int j = 0; j < numTerms; j++) { tps[j].nextPosition(); BytesRef br = tps[j].getPayload(); - System.arraycopy(br.bytes, br.offset, verifyPayloadData, offset, br.length); - offset += br.length; + if (br != null) { + System.arraycopy(br.bytes, br.offset, verifyPayloadData, offset, br.length); + offset += br.length; + } } } } Index: lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java (revision 1000007) +++ lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java (working copy) @@ -22,6 +22,7 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; +import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.document.Field; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.PhraseQuery; @@ -64,7 +65,7 @@ } - private void createIndex(Random random, int numHits) throws IOException { + private void createIndex(int numHits) throws IOException { int numDocs = 500; Directory directory = new SeekCountingDirectory(new RAMDirectory()); @@ -106,8 +107,8 @@ return this.searcher.search(pq, null, 1000).scoreDocs; } - private void performTest(Random random, int numHits) throws IOException { - createIndex(random, numHits); + private void performTest(int numHits) throws IOException { + createIndex(numHits); this.seeksCounter = 0; ScoreDoc[] hits = search(); // verify that the right number of docs was found @@ -119,9 +120,12 @@ } public void testLazySkipping() throws IOException { - // test whether only the minimum amount of seeks() are performed - performTest(random, 5); - performTest(random, 10); + // test whether only the minimum amount of seeks() + // are performed + if (!CodecProvider.getDefaultCodec().equals("SimpleText")) { + performTest(5); + performTest(10); + } } public void testSeek() throws IOException { Index: lucene/src/test/org/apache/lucene/util/LuceneTestCase.java =================================================================== --- lucene/src/test/org/apache/lucene/util/LuceneTestCase.java (revision 1000007) +++ lucene/src/test/org/apache/lucene/util/LuceneTestCase.java (working copy) @@ -35,11 +35,11 @@ import org.apache.lucene.index.codecs.preflex.PreFlexCodec; import org.apache.lucene.index.codecs.preflexrw.PreFlexRWCodec; import org.apache.lucene.index.codecs.pulsing.PulsingCodec; +import org.apache.lucene.index.codecs.simpletext.SimpleTextCodec; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.FieldCache.CacheEntry; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.MMapDirectory; import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.util.FieldCacheSanityChecker.Insanity; import org.junit.After; @@ -192,7 +192,7 @@ private static Map stores; - private static final String[] TEST_CODECS = new String[] {"MockSep", "MockFixedIntBlock", "MockVariableIntBlock"}; + private static final String[] TEST_CODECS = new String[] {"MockSep", "MockFixedIntBlock", "MockVariableIntBlock", "SimpleText"}; private static void swapCodec(Codec c) { final CodecProvider cp = CodecProvider.getDefault(); @@ -245,6 +245,7 @@ swapCodec(new MockFixedIntBlockCodec(codecHasParam && "MockFixedIntBlock".equals(codec) ? codecParam : _TestUtil.nextInt(seedRnd, 1, 2000))); // baseBlockSize cannot be over 127: swapCodec(new MockVariableIntBlockCodec(codecHasParam && "MockVariableIntBlock".equals(codec) ? codecParam : _TestUtil.nextInt(seedRnd, 1, 127))); + swapCodec(new SimpleTextCodec()); return cp.lookup(codec); } Index: lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java (revision 0) +++ lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java (revision 0) @@ -0,0 +1,153 @@ +package org.apache.lucene.index.codecs.simpletext; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.UnicodeUtil; +import org.apache.lucene.index.codecs.FieldsConsumer; +import org.apache.lucene.index.codecs.TermsConsumer; +import org.apache.lucene.index.codecs.PostingsConsumer; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.IndexOutput; + +import java.io.IOException; +import java.util.Comparator; + +// TODO +// - hmm if term has 0 docs don't make an entry.... + +class SimpleTextFieldsWriter extends FieldsConsumer { + + private final IndexOutput out; + private final BytesRef scratch = new BytesRef(10); + final static byte NEWLINE = 10; + final static byte ESCAPE = 92; + + final static BytesRef END = new BytesRef("END"); + final static BytesRef FIELD = new BytesRef("field "); + final static BytesRef TERM = new BytesRef(" term "); + final static BytesRef DOC = new BytesRef(" doc "); + final static BytesRef POS = new BytesRef(" pos "); + final static BytesRef PAYLOAD = new BytesRef(" payload "); + + public SimpleTextFieldsWriter(SegmentWriteState state) throws IOException { + final String fileName = SimpleTextCodec.getPostingsFileName(state.segmentName); + out = state.directory.createOutput(fileName); + state.flushedFiles.add(fileName); + } + + private void write(String s) throws IOException { + UnicodeUtil.UTF16toUTF8(s, 0, s.length(), scratch); + write(scratch); + } + + private void write(BytesRef b) throws IOException { + for(int i=0;i getComparator() { + return BytesRef.getUTF8SortedAsUnicodeComparator(); + } + } + + private class SimpleTextPostingsWriter extends PostingsConsumer { + private BytesRef term; + private boolean wroteTerm; + + @Override + public void startDoc(int docID, int termDocFreq) throws IOException { + if (!wroteTerm) { + // we lazily do this, in case the term had zero docs + write(TERM); + write(term); + newline(); + wroteTerm = true; + } + + write(DOC); + write(Integer.toString(docID)); + newline(); + } + + public PostingsConsumer reset(BytesRef term) { + this.term = term; + wroteTerm = false; + return this; + } + + @Override + public void addPosition(int position, BytesRef payload) throws IOException { + write(POS); + write(Integer.toString(position)); + newline(); + if (payload != null && payload.length > 0) { + assert payload.length != 0; + write(PAYLOAD); + write(payload); + newline(); + } + } + + @Override + public void finishDoc() { + } + } + + @Override + public void close() throws IOException { + write(END); + newline(); + out.close(); + } +} \ No newline at end of file Index: lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java (revision 0) +++ lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java (revision 0) @@ -0,0 +1,496 @@ +package org.apache.lucene.index.codecs.simpletext; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.index.codecs.FieldsProducer; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.FieldsEnum; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.StringHelper; + +import java.io.IOException; +import java.util.Comparator; + +class SimpleTextFieldsReader extends FieldsProducer { + + private final IndexInput in; + private final FieldInfos fieldInfos; + + final static byte NEWLINE = SimpleTextFieldsWriter.NEWLINE; + final static byte ESCAPE = SimpleTextFieldsWriter.ESCAPE; + final static BytesRef END = SimpleTextFieldsWriter.END; + final static BytesRef FIELD = SimpleTextFieldsWriter.FIELD; + final static BytesRef TERM = SimpleTextFieldsWriter.TERM; + final static BytesRef DOC = SimpleTextFieldsWriter.DOC; + final static BytesRef POS = SimpleTextFieldsWriter.POS; + final static BytesRef PAYLOAD = SimpleTextFieldsWriter.PAYLOAD; + + public SimpleTextFieldsReader(SegmentReadState state) throws IOException { + in = state.dir.openInput(SimpleTextCodec.getPostingsFileName(state.segmentInfo.name)); + fieldInfos = state.fieldInfos; + } + + static void readLine(IndexInput in, BytesRef scratch) throws IOException { + int upto = 0; + while(true) { + byte b = in.readByte(); + if (scratch.bytes.length == upto) { + scratch.grow(1+upto); + } + if (b == ESCAPE) { + scratch.bytes[upto++] = in.readByte(); + } else { + if (b == NEWLINE) { + break; + } else { + scratch.bytes[upto++] = b; + } + } + } + scratch.offset = 0; + scratch.length = upto; + } + + private class SimpleTextFieldsEnum extends FieldsEnum { + private final IndexInput in; + private final BytesRef scratch = new BytesRef(10); + private boolean omitTF; + + public SimpleTextFieldsEnum() { + this.in = (IndexInput) SimpleTextFieldsReader.this.in.clone(); + } + + @Override + public String next() throws IOException { + while(true) { + readLine(in, scratch); + if (scratch.equals(END)) { + return null; + } + if (scratch.startsWith(FIELD)) { + String field = StringHelper.intern(new String(scratch.bytes, scratch.offset + FIELD.length, scratch.length - FIELD.length, "UTF-8")); + omitTF = fieldInfos.fieldInfo(field).omitTermFreqAndPositions; + return field; + } + } + } + + @Override + public TermsEnum terms() throws IOException { + return new SimpleTextTermsEnum(in.getFilePointer(), omitTF); + } + } + + private class SimpleTextTermsEnum extends TermsEnum { + private final IndexInput in; + private final boolean omitTF; + private BytesRef current; + private final long fieldStart; + private final BytesRef scratch = new BytesRef(10); + private final BytesRef scratch2 = new BytesRef(10); + private int docFreq; + private long docsStart; + private boolean ended; + + public SimpleTextTermsEnum(long offset, boolean omitTF) throws IOException { + this.in = (IndexInput) SimpleTextFieldsReader.this.in.clone(); + this.in.seek(offset); + this.omitTF = omitTF; + fieldStart = offset; + } + + public SeekStatus seek(BytesRef text, boolean useCache /* ignored */) throws IOException { + if (current != null) { + final int cmp = current.compareTo(text); + if (cmp == 0) { + return SeekStatus.FOUND; + } else if (cmp > 0) { + ended = false; + in.seek(fieldStart); + } + } else { + ended = false; + in.seek(fieldStart); + } + + // Naive!! This just scans... would be better to do + // up-front scan to build in-RAM index + BytesRef b; + while((b = next()) != null) { + final int cmp = b.compareTo(text); + if (cmp == 0) { + ended = false; + return SeekStatus.FOUND; + } else if (cmp > 0) { + ended = false; + return SeekStatus.NOT_FOUND; + } + } + current = null; + ended = true; + return SeekStatus.END; + } + + @Override + public BytesRef next() throws IOException { + assert !ended; + readLine(in, scratch); + if (scratch.equals(END) || scratch.startsWith(FIELD)) { + ended = true; + current = null; + return null; + } else { + assert scratch.startsWith(TERM): "got " + scratch.utf8ToString(); + docsStart = in.getFilePointer(); + final int len = scratch.length - TERM.length; + if (len > scratch2.length) { + scratch2.grow(len); + } + System.arraycopy(scratch.bytes, TERM.length, scratch2.bytes, 0, len); + scratch2.length = len; + current = scratch2; + docFreq = 0; + long lineStart = 0; + while(true) { + lineStart = in.getFilePointer(); + readLine(in, scratch); + if (scratch.equals(END) || scratch.startsWith(FIELD) || scratch.startsWith(TERM)) { + break; + } + if (scratch.startsWith(DOC)) { + docFreq++; + } + } + in.seek(lineStart); + return current; + } + } + + @Override + public BytesRef term() { + return current; + } + + @Override + public long ord() throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public SeekStatus seek(long ord) { + throw new UnsupportedOperationException(); + } + + @Override + public int docFreq() { + return docFreq; + } + + @Override + public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException { + SimpleTextDocsEnum docsEnum; + if (reuse != null && reuse instanceof SimpleTextDocsEnum && ((SimpleTextDocsEnum) reuse).canReuse(in)) { + docsEnum = (SimpleTextDocsEnum) reuse; + } else { + docsEnum = new SimpleTextDocsEnum(); + } + return docsEnum.reset(docsStart, skipDocs, omitTF); + } + + @Override + public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { + if (omitTF) { + return null; + } + + SimpleTextDocsAndPositionsEnum docsAndPositionsEnum; + if (reuse != null && reuse instanceof SimpleTextDocsAndPositionsEnum && ((SimpleTextDocsAndPositionsEnum) reuse).canReuse(in)) { + docsAndPositionsEnum = (SimpleTextDocsAndPositionsEnum) reuse; + } else { + docsAndPositionsEnum = new SimpleTextDocsAndPositionsEnum(); + } + // nocommit + docsAndPositionsEnum.term = current; + return docsAndPositionsEnum.reset(docsStart, skipDocs); + } + + @Override + public Comparator getComparator() { + return BytesRef.getUTF8SortedAsUnicodeComparator(); + } + } + + private class SimpleTextDocsEnum extends DocsEnum { + private final IndexInput inStart; + private final IndexInput in; + private boolean omitTF; + private int docID; + private int tf; + private Bits skipDocs; + private final BytesRef scratch = new BytesRef(10); + + public SimpleTextDocsEnum() { + this.inStart = SimpleTextFieldsReader.this.in; + this.in = (IndexInput) this.inStart.clone(); + } + + public boolean canReuse(IndexInput in) { + return in == inStart; + } + + public SimpleTextDocsEnum reset(long fp, Bits skipDocs, boolean omitTF) throws IOException { + this.skipDocs = skipDocs; + in.seek(fp); + this.omitTF = omitTF; + if (omitTF) { + tf = 1; + } + return this; + } + + @Override + public int docID() { + return docID; + } + + @Override + public int freq() { + return tf; + } + + @Override + public int nextDoc() throws IOException { + if (docID == NO_MORE_DOCS) { + return docID; + } + boolean first = true; + int termFreq = 0; + while(true) { + final long lineStart = in.getFilePointer(); + readLine(in, scratch); + if (scratch.startsWith(DOC)) { + if (!first && (skipDocs == null || !skipDocs.get(docID))) { + in.seek(lineStart); + if (!omitTF) { + tf = termFreq; + } + return docID; + } + docID = Integer.parseInt(new String(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length)); + termFreq = 0; + first = false; + } else if (scratch.startsWith(POS)) { + termFreq++; + } else if (scratch.startsWith(PAYLOAD)) { + // skip + } else { + assert scratch.startsWith(TERM) || scratch.startsWith(FIELD) || scratch.startsWith(END): "scratch=" + scratch.utf8ToString(); + if (!first && (skipDocs == null || !skipDocs.get(docID))) { + in.seek(lineStart); + if (!omitTF) { + tf = termFreq; + } + return docID; + } + return docID = NO_MORE_DOCS; + } + } + } + + @Override + public int advance(int target) throws IOException { + // Naive -- better to index skip data + while(nextDoc() < target); + return docID; + } + } + + private class SimpleTextDocsAndPositionsEnum extends DocsAndPositionsEnum { + private final IndexInput inStart; + private final IndexInput in; + private int docID; + private int tf; + private Bits skipDocs; + private final BytesRef scratch = new BytesRef(10); + private final BytesRef scratch2 = new BytesRef(10); + private BytesRef payload; + private long nextDocStart; + + // nocommit + public BytesRef term; + + public SimpleTextDocsAndPositionsEnum() { + this.inStart = SimpleTextFieldsReader.this.in; + this.in = (IndexInput) inStart.clone(); + } + + public boolean canReuse(IndexInput in) { + return in == inStart; + } + + public SimpleTextDocsAndPositionsEnum reset(long fp, Bits skipDocs) { + this.skipDocs = skipDocs; + nextDocStart = fp; + return this; + } + + @Override + public int docID() { + return docID; + } + + @Override + public int freq() { + return tf; + } + + @Override + public int nextDoc() throws IOException { + boolean first = true; + in.seek(nextDocStart); + long posStart = 0; + while(true) { + final long lineStart = in.getFilePointer(); + readLine(in, scratch); + if (scratch.startsWith(DOC)) { + if (!first && (skipDocs == null || !skipDocs.get(docID))) { + nextDocStart = lineStart; + in.seek(posStart); + return docID; + } + docID = Integer.parseInt(new String(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length)); + tf = 0; + posStart = in.getFilePointer(); + first = false; + } else if (scratch.startsWith(POS)) { + tf++; + } else if (scratch.startsWith(PAYLOAD)) { + // skip + } else { + assert scratch.startsWith(TERM) || scratch.startsWith(FIELD) || scratch.startsWith(END); + if (!first && (skipDocs == null || !skipDocs.get(docID))) { + nextDocStart = lineStart; + in.seek(posStart); + return docID; + } + return docID = NO_MORE_DOCS; + } + } + } + + @Override + public int advance(int target) throws IOException { + // Naive -- better to index skip data + while(nextDoc() < target); + return docID; + } + + @Override + public int nextPosition() throws IOException { + readLine(in, scratch); + assert scratch.startsWith(POS): "got line=" + scratch.utf8ToString(); + final int pos = Integer.parseInt(new String(scratch.bytes, scratch.offset+POS.length, scratch.length-POS.length)); + final long fp = in.getFilePointer(); + readLine(in, scratch); + if (scratch.startsWith(PAYLOAD)) { + final int len = scratch.length - PAYLOAD.length; + if (scratch2.bytes.length < len) { + scratch2.grow(len); + } + System.arraycopy(scratch.bytes, PAYLOAD.length, scratch2.bytes, 0, len); + scratch2.length = len; + payload = scratch2; + } else { + payload = null; + in.seek(fp); + } + return pos; + } + + @Override + public BytesRef getPayload() { + // Some tests rely on only being able to retrieve the + // payload once + try { + return payload; + } finally { + payload = null; + } + } + + @Override + public boolean hasPayload() { + return payload != null; + } + } + + private class SimpleTextTerms extends Terms { + private final String field; + private final long termsStart; + private final boolean omitTF; + + public SimpleTextTerms(String field, long termsStart) { + this.field = StringHelper.intern(field); + this.termsStart = termsStart; + omitTF = fieldInfos.fieldInfo(field).omitTermFreqAndPositions; + } + + @Override + public TermsEnum iterator() throws IOException { + return new SimpleTextTermsEnum(termsStart, omitTF); + } + + @Override + public Comparator getComparator() { + return BytesRef.getUTF8SortedAsUnicodeComparator(); + } + } + + @Override + public FieldsEnum iterator() throws IOException { + return new SimpleTextFieldsEnum(); + } + + @Override + public Terms terms(String field) throws IOException { + SimpleTextFieldsEnum fe = (SimpleTextFieldsEnum) iterator(); + String fieldUpto; + while((fieldUpto = fe.next()) != null) { + if (fieldUpto.equals(field)) { + return new SimpleTextTerms(field, fe.in.getFilePointer()); + } + } + return null; + } + + @Override + public void loadTermsIndex(int indexDivisor) { + } + + @Override + public void close() throws IOException { + in.close(); + } +} Index: lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java (revision 0) +++ lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java (revision 0) @@ -0,0 +1,72 @@ +package org.apache.lucene.index.codecs.simpletext; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Set; + +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.index.codecs.FieldsConsumer; +import org.apache.lucene.index.codecs.FieldsProducer; +import org.apache.lucene.store.Directory; + +/** For debugging, curiosity, transparency only!! Do not + * use this codec in production. + * + *

This codec stores all postings data in a single + * human-readable text file (_N.pst). You can view this in + * any text editor, and even edit it to alter your index. + * + * @lucene.experimental */ +public class SimpleTextCodec extends Codec { + + public SimpleTextCodec() { + name = "SimpleText"; + } + + @Override + public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { + return new SimpleTextFieldsWriter(state); + } + + @Override + public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { + return new SimpleTextFieldsReader(state); + } + + /** Extension of freq postings file */ + static final String POSTINGS_EXTENSION = "pst"; + + static String getPostingsFileName(String segment) { + return IndexFileNames.segmentFileName(segment, "", POSTINGS_EXTENSION); + } + + @Override + public void files(Directory dir, SegmentInfo segmentInfo, Set files) throws IOException { + files.add(getPostingsFileName(segmentInfo.name)); + } + + @Override + public void getExtensions(Set extensions) { + extensions.add(POSTINGS_EXTENSION); + } +} Index: lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java (revision 1000007) +++ lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java (working copy) @@ -581,13 +581,6 @@ return position; } - /** Returns length of payload at current position */ - public int getPayloadLength() { - assert lazyProxPointer == -1; - assert posPendingCount < freq; - return payloadLength; - } - /** Returns the payload at this position, or null if no * payload was indexed. */ public BytesRef getPayload() throws IOException {