Index: src/test/org/apache/lucene/TestExternalCodecs.java =================================================================== --- src/test/org/apache/lucene/TestExternalCodecs.java (revision 900389) +++ src/test/org/apache/lucene/TestExternalCodecs.java (working copy) @@ -219,7 +219,7 @@ } @Override - public void addPosition(int position, byte[] payload, int payloadOffset, int payloadLength) { + public void add(int position, BytesRef payload) { if (payload != null) { throw new UnsupportedOperationException("can't handle payloads"); } @@ -413,7 +413,7 @@ } @Override - public byte[] getPayload(byte[] data, int offset) { + public BytesRef getPayload() { return null; } } Index: src/test/org/apache/lucene/search/TestPhraseQuery.java =================================================================== --- src/test/org/apache/lucene/search/TestPhraseQuery.java (revision 900389) +++ src/test/org/apache/lucene/search/TestPhraseQuery.java (working copy) @@ -566,5 +566,5 @@ q2.add(new PhraseQuery(), BooleanClause.Occur.MUST); q2.toString(); } - + } Index: src/test/org/apache/lucene/index/TestCodecs.java =================================================================== --- src/test/org/apache/lucene/index/TestCodecs.java (revision 900389) +++ src/test/org/apache/lucene/index/TestCodecs.java (working copy) @@ -130,9 +130,9 @@ class PositionData { int pos; - byte[] payload; + BytesRef payload; - PositionData(int pos, byte[] payload) { + PositionData(int pos, BytesRef payload) { this.pos = pos; this.payload = payload; } @@ -170,14 +170,12 @@ if (!field.omitTF) { for(int j=0;j getPayload() throws IOException { - byte [] bytes = new byte[positions.getPayloadLength()]; - bytes = positions.getPayload(bytes, 0); + final BytesRef payload = positions.getPayload(); + final byte[] bytes; + if (payload != null) { + bytes = new byte[payload.length]; + System.arraycopy(payload.bytes, payload.offset, bytes, 0, payload.length); + } else { + bytes = null; + } return Collections.singletonList(bytes); } Index: src/java/org/apache/lucene/search/MultiPhraseQuery.java =================================================================== --- src/java/org/apache/lucene/search/MultiPhraseQuery.java (revision 900389) +++ src/java/org/apache/lucene/search/MultiPhraseQuery.java (working copy) @@ -526,7 +526,7 @@ } @Override - public byte[] getPayload(byte[] data, int offset) { + public BytesRef getPayload() { throw new UnsupportedOperationException(); } Index: src/java/org/apache/lucene/search/PhrasePositions.java =================================================================== --- src/java/org/apache/lucene/search/PhrasePositions.java (revision 900389) +++ src/java/org/apache/lucene/search/PhrasePositions.java (working copy) @@ -44,10 +44,6 @@ return false; } positions = docs.positions(); - - // nocommit -- really needed? - //position = 0; - return true; } @@ -56,8 +52,6 @@ if (doc == docs.NO_MORE_DOCS) { return false; } - // nocommit -- really needed? - // position = 0; return true; } @@ -65,6 +59,9 @@ final void firstPosition() throws IOException { count = docs.freq(); // read first pos positions = docs.positions(); + if (positions == null) { + throw new IllegalStateException("no positions are stored for this field (Field.omitTermFreqAndPositions was used)"); + } nextPosition(); } Index: src/java/org/apache/lucene/search/ExactPhraseScorer.java =================================================================== --- src/java/org/apache/lucene/search/ExactPhraseScorer.java (revision 900389) +++ src/java/org/apache/lucene/search/ExactPhraseScorer.java (working copy) @@ -42,11 +42,11 @@ int freq = 0; do { // find position w/ all terms while (first.position < last.position) { // scan forward in first - do { - if (!first.nextPosition()) - return freq; - } while (first.position < last.position); - firstToLast(); + do { + if (!first.nextPosition()) + return freq; + } while (first.position < last.position); + firstToLast(); } freq++; // all equal: a match } while (last.nextPosition()); Index: src/java/org/apache/lucene/index/DocsEnum.java =================================================================== --- src/java/org/apache/lucene/index/DocsEnum.java (revision 900417) +++ src/java/org/apache/lucene/index/DocsEnum.java (working copy) @@ -65,12 +65,11 @@ return count; } - // nocommit -- maybe move this up to TermsEnum? that - // would disallow changing positions format/reader of each - // doc, though - // nocommit - doc whether this returns null if there are - // no positions, or a faker /** Don't call next() or skipTo() or read() until you're - * done consuming the positions */ + * done consuming the positions. NOTE: this method may + * return null, if the index contains no positional + * information for this document. The standard codec + * (default) does this today when the field was indexed + * with {@link Field#setOmitTermFreqAndPositions}. */ public abstract PositionsEnum positions() throws IOException; } Index: src/java/org/apache/lucene/index/LegacyFieldsEnum.java =================================================================== --- src/java/org/apache/lucene/index/LegacyFieldsEnum.java (revision 900389) +++ src/java/org/apache/lucene/index/LegacyFieldsEnum.java (working copy) @@ -259,9 +259,23 @@ return tp.getPayloadLength(); } + private BytesRef payload; + @Override - public byte[] getPayload(byte[] data, int offset) throws IOException { - return tp.getPayload(data, offset); + public BytesRef getPayload() throws IOException { + final int len = tp.getPayloadLength(); + if (payload == null) { + payload = new BytesRef(); + payload.bytes = new byte[len]; + } else { + if (payload.bytes.length < len) { + payload.grow(len); + } + } + + payload.bytes = tp.getPayload(payload.bytes, 0); + payload.length = len; + return payload; } @Override Index: src/java/org/apache/lucene/index/FreqProxTermsWriter.java =================================================================== --- src/java/org/apache/lucene/index/FreqProxTermsWriter.java (revision 900389) +++ src/java/org/apache/lucene/index/FreqProxTermsWriter.java (working copy) @@ -145,6 +145,7 @@ } private byte[] payloadBuffer; + BytesRef payload; /* Walk through all unique text tokens (Posting * instances) found in this field and serialize them @@ -244,19 +245,29 @@ //System.out.println(" pos=" + position); final int payloadLength; + final BytesRef thisPayload; + if ((code & 1) != 0) { // This position has a payload - payloadLength = prox.readVInt(); + payloadLength = prox.readVInt(); + + if (payload == null) { + payload = new BytesRef(); + payload.bytes = new byte[payloadLength]; + } else if (payload.bytes.length < payloadLength) { + payload.grow(payloadLength); + } - if (payloadBuffer == null || payloadBuffer.length < payloadLength) - payloadBuffer = new byte[payloadLength]; + prox.readBytes(payload.bytes, 0, payloadLength); + payload.length = payloadLength; + thisPayload = payload; - prox.readBytes(payloadBuffer, 0, payloadLength); - - } else + } else { payloadLength = 0; + thisPayload = null; + } - posConsumer.addPosition(position, payloadBuffer, 0, payloadLength); + posConsumer.add(position, thisPayload); } //End for posConsumer.finishDoc(); Index: src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- src/java/org/apache/lucene/index/SegmentReader.java (revision 900389) +++ src/java/org/apache/lucene/index/SegmentReader.java (working copy) @@ -927,9 +927,10 @@ SegmentTermPositions stp = new SegmentTermPositions(pre.freqStream, pre.proxStream, pre.tis, core.fieldInfos); stp.setSkipDocs(deletedDocs); return stp; - } else + } else { // Emulate old API return new LegacyTermPositions(); + } } @Override @@ -1564,6 +1565,7 @@ final private class LegacyTermPositions extends LegacyTermDocs implements TermPositions { PositionsEnum positions; + boolean didGetPositions; LegacyTermPositions() throws IOException { super(); @@ -1572,14 +1574,18 @@ @Override public void seek(TermEnum termEnum) throws IOException { super.seek(termEnum); - if (docs != null) + if (docs != null) { positions = docs.positions(); + didGetPositions = true; + } else { + didGetPositions = false; + } } @Override public boolean skipTo(int target) throws IOException { boolean result = super.skipTo(target); - positions = null; + didGetPositions = false; return result; } @@ -1591,32 +1597,58 @@ @Override public void seek(Term term) throws IOException { super.seek(term); - positions = null; + didGetPositions = false; } @Override public boolean next() throws IOException { boolean result = super.next(); - positions = null; + didGetPositions = false; return result; } public int nextPosition() throws IOException { - if (positions == null) { + if (!didGetPositions) { positions = docs.positions(); + didGetPositions = true; } - return positions.next(); + + if (positions == null) { + // With omitTFAP, pre-flex API pretended there was + // one occurrence of the term, at position 0: + return 0; + } else { + return positions.next(); + } } public int getPayloadLength() { + if (positions == null) { + return 0; + } return positions.getPayloadLength(); } - public byte[] getPayload(byte[] data, int offset) throws IOException { - return positions.getPayload(data, offset); + public byte[] getPayload(byte[] bytes, int offset) throws IOException { + final BytesRef payload = positions.getPayload(); + // old API would always used passed in bytes if it + // "fits", else allocate new: + if (bytes != null && payload.length <= bytes.length - offset) { + System.arraycopy(payload.bytes, payload.offset, bytes, offset, payload.length); + return bytes; + } else if (payload.offset == 0 && payload.length == payload.bytes.length) { + return payload.bytes; + } else { + final byte[] retBytes = new byte[payload.length]; + System.arraycopy(payload.bytes, payload.offset, retBytes, 0, payload.length); + return retBytes; + } } public boolean isPayloadAvailable() { + if (positions == null) { + return false; + } return positions.hasPayload(); } } Index: src/java/org/apache/lucene/index/PositionsEnum.java =================================================================== --- src/java/org/apache/lucene/index/PositionsEnum.java (revision 900417) +++ src/java/org/apache/lucene/index/PositionsEnum.java (working copy) @@ -20,6 +20,7 @@ import java.io.IOException; import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.BytesRef; public abstract class PositionsEnum { @@ -41,12 +42,12 @@ * the behavior is not defined. */ public abstract int next() throws IOException; + /** Returns length of payload at current position */ public abstract int getPayloadLength(); - // nocommit -- how to use BytesRef here? - // nocommit -- improve this so that readers that do their - // own buffering can save a copy - public abstract byte[] getPayload(byte[] data, int offset) throws IOException; + /** Returns the payload at this position, or null if no + * payload was indexed. */ + public abstract BytesRef getPayload() throws IOException; public abstract boolean hasPayload(); } Index: src/java/org/apache/lucene/index/codecs/pulsing/PulsingDocsWriter.java =================================================================== --- src/java/org/apache/lucene/index/codecs/pulsing/PulsingDocsWriter.java (revision 900389) +++ src/java/org/apache/lucene/index/codecs/pulsing/PulsingDocsWriter.java (working copy) @@ -27,6 +27,7 @@ import org.apache.lucene.index.codecs.standard.StandardPositionsConsumer; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; // TODO: we now pulse entirely according to docFreq of the // term; it might be better to eg pulse by "net bytes used" @@ -96,18 +97,15 @@ boolean pulsed; // false if we've seen > maxPulsingDocFreq docs static class Position { - byte[] payload; + BytesRef payload; int pos; - int payloadLength; @Override public Object clone() { Position position = new Position(); position.pos = pos; - position.payloadLength = payloadLength; - if(payload != null) { - position.payload = new byte[payload.length]; - System.arraycopy(payload, 0, position.payload, 0, payloadLength); + if (payload != null) { + position.payload = new BytesRef(payload); } return position; } @@ -164,27 +162,33 @@ class PositionsWriter extends StandardPositionsConsumer { @Override public void start(IndexOutput termsOut) {} + @Override public void startTerm() {} + @Override - public void addPosition(int position, byte[] payload, int payloadOffset, int payloadLength) { + public void add(int position, BytesRef payload) { Position pos = currentDoc.positions[currentDoc.numPositions++]; pos.pos = position; - if (payload != null && payloadLength > 0) { - if (pos.payload == null || payloadLength > pos.payload.length) { - pos.payload = new byte[ArrayUtil.getNextSize(payloadLength)]; + if (payload != null && payload.length > 0) { + if (pos.payload == null) { + pos.payload = new BytesRef(payload); + } else { + pos.payload.copy(payload); } - System.arraycopy(payload, payloadOffset, pos.payload, 0, payloadLength); - pos.payloadLength = payloadLength; - } else - pos.payloadLength = 0; + } else if (pos.payload != null) { + pos.payload.length = 0; + } } + @Override public void finishDoc() { assert currentDoc.numPositions == currentDoc.termDocFreq; } + @Override public void finishTerm(boolean isIndexTerm) {} + @Override public void close() {} } @@ -219,11 +223,12 @@ assert doc.termDocFreq == doc.numPositions; for(int j=0;j 0) { + if (pos.payload != null && pos.payload.length > 0) { assert storePayloads; - posConsumer.addPosition(pos.pos, pos.payload, 0, pos.payloadLength); - } else - posConsumer.addPosition(pos.pos, null, 0, 0); + posConsumer.add(pos.pos, pos.payload); + } else { + posConsumer.add(pos.pos, null); + } } posConsumer.finishDoc(); } @@ -305,16 +310,21 @@ final int delta2 = pos.pos - lastPosition; lastPosition = pos.pos; if (storePayloads) { - if (pos.payloadLength != lastPayloadLength) { + final int payloadLength = pos.payload == null ? 0 : pos.payload.length; + if (payloadLength != lastPayloadLength) { termsOut.writeVInt((delta2 << 1)|1); - termsOut.writeVInt(pos.payloadLength); - lastPayloadLength = pos.payloadLength; - } else + termsOut.writeVInt(payloadLength); + lastPayloadLength = payloadLength; + } else { termsOut.writeVInt(delta2 << 1); - if (pos.payloadLength > 0) - termsOut.writeBytes(pos.payload, 0, pos.payloadLength); - } else + } + + if (payloadLength > 0) { + termsOut.writeBytes(pos.payload.bytes, 0, pos.payload.length); + } + } else { termsOut.writeVInt(delta2); + } } } } Index: src/java/org/apache/lucene/index/codecs/pulsing/PulsingDocsReader.java =================================================================== --- src/java/org/apache/lucene/index/codecs/pulsing/PulsingDocsReader.java (revision 900389) +++ src/java/org/apache/lucene/index/codecs/pulsing/PulsingDocsReader.java (working copy) @@ -31,6 +31,7 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; /** Concrete class that reads the current doc/freq/skip * postings format */ @@ -151,19 +152,26 @@ final int code2 = termsIn.readVInt(); if (storePayloads) { position += code2 >>> 1; - if ((code2 & 1) != 0) + if ((code2 & 1) != 0) { payloadLength = termsIn.readVInt(); + } + if (payloadLength > 0) { - if (pos.payload == null || payloadLength > pos.payload.length) { - pos.payload = new byte[ArrayUtil.getNextSize(payloadLength)]; + if (pos.payload == null) { + pos.payload = new BytesRef(); + pos.payload.bytes = new byte[payloadLength]; + } else if (payloadLength > pos.payload.bytes.length) { + pos.payload.grow(payloadLength); } - termsIn.readBytes(pos.payload, 0, payloadLength); + pos.payload.length = payloadLength; + termsIn.readBytes(pos.payload.bytes, 0, payloadLength); + } else if (pos.payload != null) { + pos.payload.length = 0; } } else { position += code2; } pos.pos = position; - pos.payloadLength = payloadLength; } } doc.docID = docID; @@ -273,25 +281,17 @@ @Override public int getPayloadLength() { - return pos.payloadLength; + return pos.payload == null ? 0 : pos.payload.length; } @Override public boolean hasPayload() { - // nocommit -- maybe don't do the payloadRetrieved check? - return !payloadRetrieved && pos.payloadLength > 0; + return pos.payload != null && pos.payload.length > 0; } @Override - public byte[] getPayload(byte[] data, int offset) { - // nocommit -- inefficient - if (!payloadRetrieved) { - payloadRetrieved = true; - System.arraycopy(pos.payload, 0, data, offset, pos.payloadLength); - return data; - } else { - return null; - } + public BytesRef getPayload() { + return pos.payload; } } Index: src/java/org/apache/lucene/index/codecs/PositionsConsumer.java =================================================================== --- src/java/org/apache/lucene/index/codecs/PositionsConsumer.java (revision 900389) +++ src/java/org/apache/lucene/index/codecs/PositionsConsumer.java (working copy) @@ -20,20 +20,22 @@ import java.io.IOException; import org.apache.lucene.index.PositionsEnum; +import org.apache.lucene.util.BytesRef; public abstract class PositionsConsumer { - /** Add a new position & payload. If payloadLength > 0 - * you must read those bytes from the IndexInput. NOTE: - * you must fully consume the byte[] payload, since - * caller is free to reuse it on subsequent calls. */ - public abstract void addPosition(int position, byte[] payload, int payloadOffset, int payloadLength) throws IOException; + /** Add a new position & payload. A null payload means no + * payload; a non-null payload with zero length also + * means no payload. Caller may reuse the {@link + * BytesRef} for the payload between calls (method must + * fully consume the payload). */ + public abstract void add(int position, BytesRef payload) throws IOException; /** Called when we are done adding positions & payloads * for each doc */ public abstract void finishDoc() throws IOException; - private byte[] payloadBuffer; + private BytesRef payload; /** Default merge impl, just copies positions & payloads * from the input. */ @@ -41,13 +43,14 @@ for(int i=0;i 0) { - if (payloadBuffer == null || payloadBuffer.length < payloadLength) { - payloadBuffer = new byte[payloadLength]; - } - positions.getPayload(payloadBuffer, 0); + payload = positions.getPayload(); + } else { + payload = null; } - addPosition(position, payloadBuffer, 0, payloadLength); + add(position, payload); } finishDoc(); } Index: src/java/org/apache/lucene/index/codecs/sep/SepPositionsReader.java =================================================================== --- src/java/org/apache/lucene/index/codecs/sep/SepPositionsReader.java (revision 900417) +++ src/java/org/apache/lucene/index/codecs/sep/SepPositionsReader.java (working copy) @@ -28,6 +28,7 @@ import org.apache.lucene.index.codecs.standard.StandardPositionsProducer; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.BytesRef; /** @lucene.experimental */ public class SepPositionsReader extends StandardPositionsProducer { @@ -281,32 +282,35 @@ return payloadLength; } + private BytesRef payload; + @Override - public byte[] getPayload(byte[] data, int offset) throws IOException { + public BytesRef getPayload() throws IOException { if (!payloadPending) { throw new IOException("Either no payload exists at this term position or an attempt was made to load it more than once."); } + if (payloadLength == 0) { + return null; + } + if (Codec.DEBUG) { System.out.println(" getPayload payloadFP=" + payloadIn.getFilePointer() + " len=" + payloadLength); } - final byte[] retArray; - final int retOffset; - if (data == null || data.length-offset < payloadLength) { - // the array is too small to store the payload data, - // so we allocate a new one - retArray = new byte[payloadLength]; - retOffset = 0; - } else { - retArray = data; - retOffset = offset; + if (payload == null) { + payload = new BytesRef(); + payload.bytes = new byte[payloadLength]; + } else if (payload.bytes.length < payloadLength) { + payload.grow(payloadLength); } - payloadIn.readBytes(retArray, retOffset, payloadLength); + payloadIn.readBytes(payload.bytes, 0, payloadLength); payloadPending = false; - return retArray; + payload.length = payloadLength; + + return payload; } @Override Index: src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java =================================================================== --- src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java (revision 900417) +++ src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java (working copy) @@ -60,4 +60,4 @@ // nocommit public abstract String descFilePointer() throws IOException; -} \ No newline at end of file +} Index: src/java/org/apache/lucene/index/codecs/sep/SepDocsReader.java =================================================================== --- src/java/org/apache/lucene/index/codecs/sep/SepDocsReader.java (revision 900417) +++ src/java/org/apache/lucene/index/codecs/sep/SepDocsReader.java (working copy) @@ -398,19 +398,12 @@ // Lazy init if (posReader == null) { - // nocommit -- should we return null? - // TermFreq was omitted from this field during // indexing, which means we pretend termFreq is // always 1 with that 1 occurrence having // position 0 - if (fakePositions == null) { - fakePositions = new FakePositionsEnum(); - } - if (Codec.DEBUG) { - System.out.println(" return fake"); - } - return fakePositions; + return null; + } else { // nocommit: abstraction violation @@ -609,24 +602,3 @@ } } } - -/** Returned when someone asks for positions() enum on field - * with omitTf true */ -class FakePositionsEnum extends PositionsEnum { - @Override - public int next() { - return 0; - } - @Override - public int getPayloadLength() { - return 0; - } - @Override - public boolean hasPayload() { - return false; - } - @Override - public byte[] getPayload(byte[] data, int offset) { - return null; - } -} Index: src/java/org/apache/lucene/index/codecs/sep/SepPositionsWriter.java =================================================================== --- src/java/org/apache/lucene/index/codecs/sep/SepPositionsWriter.java (revision 900417) +++ src/java/org/apache/lucene/index/codecs/sep/SepPositionsWriter.java (working copy) @@ -25,6 +25,7 @@ import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.codecs.PositionsConsumer; import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.util.BytesRef; /** @lucene.experimental */ public final class SepPositionsWriter extends PositionsConsumer { @@ -113,12 +114,12 @@ /** Add a new position & payload */ @Override - public void addPosition(int position, byte[] payload, int payloadOffset, int payloadLength) throws IOException { + public void add(int position, BytesRef payload) throws IOException { assert !omitTF: "omitTF is true"; assert posOut != null; if (Codec.DEBUG) { - if (payload != null) { - System.out.println("pw.addPos [" + desc + "]: pos=" + position + " posFP=" + posOut.descFilePointer() + " payloadFP=" + payloadOut.getFilePointer() + " payload=" + payloadLength + " bytes"); + if (payload != null && payload.length > 0) { + System.out.println("pw.addPos [" + desc + "]: pos=" + position + " posFP=" + posOut.descFilePointer() + " payloadFP=" + payloadOut.getFilePointer() + " payload=" + payload.length + " bytes"); } else { System.out.println("pw.addPos [" + desc + "]: pos=" + position + " posFP=" + posOut.descFilePointer() + " payloadFP=" + payloadOut.getFilePointer()); } @@ -128,6 +129,7 @@ lastPosition = position; if (storePayloads) { + int payloadLength = payload == null ? 0 : payload.length; if (Codec.DEBUG) { System.out.println(" store payload len=" + payloadLength); } @@ -148,7 +150,7 @@ if (Codec.DEBUG) { System.out.println(" write @ payloadFP=" + payloadOut.getFilePointer()); } - payloadOut.writeBytes(payload, payloadLength); + payloadOut.writeBytes(payload.bytes, payload.offset, payloadLength); } } else { posOut.write(delta); Index: src/java/org/apache/lucene/index/codecs/standard/StandardPositionsWriter.java =================================================================== --- src/java/org/apache/lucene/index/codecs/standard/StandardPositionsWriter.java (revision 900389) +++ src/java/org/apache/lucene/index/codecs/standard/StandardPositionsWriter.java (working copy) @@ -24,6 +24,7 @@ import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.BytesRef; final class StandardPositionsWriter extends StandardPositionsConsumer { final static String CODEC = "SingleFilePositionsPayloads"; @@ -79,13 +80,13 @@ /** Add a new position & payload */ @Override - public void addPosition(int position, byte[] payload, int payloadOffset, int payloadLength) throws IOException { + public void add(int position, BytesRef payload) throws IOException { assert !omitTermFreqAndPositions: "omitTermFreqAndPositions is true"; assert out != null; if (Codec.DEBUG) { if (payload != null) - System.out.println("pw.addPos [" + desc + "]: pos=" + position + " fp=" + out.getFilePointer() + " payload=" + payloadLength + " bytes"); + System.out.println("pw.addPos [" + desc + "]: pos=" + position + " fp=" + out.getFilePointer() + " payload=" + payload.length + " bytes"); else System.out.println("pw.addPos [" + desc + "]: pos=" + position + " fp=" + out.getFilePointer()); } @@ -100,6 +101,7 @@ if (Codec.DEBUG) { System.out.println(" store payloads"); } + final int payloadLength = payload == null ? 0 : payload.length; if (payloadLength != lastPayloadLength) { if (Codec.DEBUG) { @@ -109,12 +111,16 @@ lastPayloadLength = payloadLength; out.writeVInt((delta<<1)|1); out.writeVInt(payloadLength); - } else + } else { out.writeVInt(delta << 1); - if (payloadLength > 0) - out.writeBytes(payload, payloadLength); - } else + } + + if (payloadLength > 0) { + out.writeBytes(payload.bytes, payload.offset, payloadLength); + } + } else { out.writeVInt(delta); + } } void setField(FieldInfo fieldInfo) { Index: src/java/org/apache/lucene/index/codecs/standard/StandardPositionsReader.java =================================================================== --- src/java/org/apache/lucene/index/codecs/standard/StandardPositionsReader.java (revision 900389) +++ src/java/org/apache/lucene/index/codecs/standard/StandardPositionsReader.java (working copy) @@ -27,9 +27,8 @@ import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.BytesRef; -// nocommit -- base class should not be named terms dict: -// this class interacts w/ a docsreader public class StandardPositionsReader extends StandardPositionsProducer { IndexInput proxIn; @@ -120,8 +119,6 @@ return positions; } - // nocommit -- should we have different reader for - // payload vs no payload? class SegmentPositionsEnum extends PositionsEnum { // nocommit @@ -241,27 +238,24 @@ return payloadLength; } - @Override - public byte[] getPayload(byte[] data, int offset) throws IOException { + private BytesRef payload; - if (!payloadPending) + @Override + public BytesRef getPayload() throws IOException { + if (!payloadPending) { throw new IOException("Either no payload exists at this term position or an attempt was made to load it more than once."); - - final byte[] retArray; - final int retOffset; - if (data == null || data.length-offset < payloadLength) { - // the array is too small to store the payload data, - // so we allocate a new one - retArray = new byte[payloadLength]; - retOffset = 0; - } else { - retArray = data; - retOffset = offset; } - - proxIn.readBytes(retArray, retOffset, payloadLength); + if (payload == null) { + payload = new BytesRef(); + payload.bytes = new byte[payloadLength]; + } else if (payloadLength > payload.bytes.length) { + payload.grow(payloadLength); + } + proxIn.readBytes(payload.bytes, 0, payloadLength); + payload.length = payloadLength; payloadPending = false; - return retArray; + + return payload; } @Override Index: src/java/org/apache/lucene/index/codecs/standard/StandardDocsReader.java =================================================================== --- src/java/org/apache/lucene/index/codecs/standard/StandardDocsReader.java (revision 900389) +++ src/java/org/apache/lucene/index/codecs/standard/StandardDocsReader.java (working copy) @@ -86,8 +86,9 @@ skipInterval = termsIn.readInt(); maxSkipLevels = termsIn.readInt(); - if (posReader != null) + if (posReader != null) { posReader.start(termsIn); + } } @Override @@ -405,10 +406,7 @@ // indexing, which means we pretend termFreq is // always 1 with that 1 occurrence having // position 0 - if (fakePositions == null) { - fakePositions = new FormatPostingsFakePositionsEnum(); - } - return fakePositions; + return null; } else { // TODO: abstraction violation positions = (StandardPositionsReader.TermsDictReader.SegmentPositionsEnum) posReader.positions(); @@ -507,24 +505,3 @@ } } } - -/** Returned when someone asks for positions() enum on field - * with omitTf true */ -class FormatPostingsFakePositionsEnum extends PositionsEnum { - @Override - public int next() { - return 0; - } - @Override - public int getPayloadLength() { - return 0; - } - @Override - public boolean hasPayload() { - return false; - } - @Override - public byte[] getPayload(byte[] data, int offset) { - return null; - } -} Index: src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java =================================================================== --- src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java (revision 900389) +++ src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java (working copy) @@ -29,7 +29,7 @@ import org.apache.lucene.index.codecs.FieldsProducer; import org.apache.lucene.store.Directory; -/** Current index file format */ +/** Default codec. */ public class StandardCodec extends Codec { public StandardCodec() { Index: src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java =================================================================== --- src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java (revision 900389) +++ src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java (working copy) @@ -439,9 +439,23 @@ return pos.isPayloadAvailable(); } + private BytesRef payload; + @Override - public byte[] getPayload(byte[] data, int offset) throws IOException { - return pos.getPayload(data, offset); + public BytesRef getPayload() throws IOException { + final int len = pos.getPayloadLength(); + if (payload == null) { + payload = new BytesRef(); + payload.bytes = new byte[len]; + } else { + if (payload.bytes.length < len) { + payload.grow(len); + } + } + + payload.bytes = pos.getPayload(payload.bytes, 0); + payload.length = len; + return payload; } } } Index: src/java/org/apache/lucene/util/BytesRef.java =================================================================== --- src/java/org/apache/lucene/util/BytesRef.java (revision 900417) +++ src/java/org/apache/lucene/util/BytesRef.java (working copy) @@ -32,6 +32,18 @@ public BytesRef() { } + public BytesRef(byte[] bytes, int offset, int length) { + this.bytes = bytes; + this.offset = offset; + this.length = length; + } + + public BytesRef(byte[] bytes) { + this.bytes = bytes; + this.offset = 0; + this.length = bytes.length; + } + /** * @param text Initialize the byte[] from the UTF8 bytes * for the provided Sring. This must be well-formed