Index: lucene/CHANGES.txt --- lucene/CHANGES.txt Mon Jun 20 10:31:20 2011 -0400 +++ lucene/CHANGES.txt Mon Jun 20 10:57:22 2011 -0400 @@ -149,6 +149,9 @@ files holding stored fields and term vectors) while flushing a segment. (Mike McCandless) +* LUCENE-2548: Field names (eg in Term, FieldInfo) are no longer + interned. (Mike McCandless) + Changes in Runtime Behavior * LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java --- lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java Mon Jun 20 10:57:22 2011 -0400 @@ -31,7 +31,6 @@ import org.apache.lucene.index.memory.MemoryIndex; import org.apache.lucene.search.Query; import org.apache.lucene.search.spans.SpanQuery; -import org.apache.lucene.util.StringHelper; /** * {@link Scorer} implementation which scores text fragments by the number of @@ -88,7 +87,7 @@ * @param defaultField */ public QueryScorer(Query query, IndexReader reader, String field, String defaultField) { - this.defaultField = StringHelper.intern(defaultField); + this.defaultField = defaultField; init(query, field, reader, true); } @@ -96,7 +95,7 @@ * @param defaultField - The default field for queries with the field name unspecified */ public QueryScorer(Query query, String field, String defaultField) { - this.defaultField = StringHelper.intern(defaultField); + this.defaultField = defaultField; init(query, field, null, true); } Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java --- lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java Mon Jun 20 10:57:22 2011 -0400 @@ -26,7 +26,6 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.FilteredQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.util.StringHelper; /** * Utility class used to extract the terms used in a query, plus any weights. @@ -94,10 +93,6 @@ public static final WeightedTerm[] getTerms(Query query, boolean prohibited, String fieldName) { HashSet terms=new HashSet(); - if(fieldName!=null) - { - fieldName= StringHelper.intern(fieldName); - } getTerms(query,terms,prohibited,fieldName); return terms.toArray(new WeightedTerm[0]); } @@ -114,7 +109,6 @@ return getTerms(query,prohibited,null); } - //fieldname MUST be interned prior to this call private static final void getTerms(Query query, HashSet terms,boolean prohibited, String fieldName) { try @@ -131,7 +125,7 @@ for (Iterator iter = nonWeightedTerms.iterator(); iter.hasNext();) { Term term = iter.next(); - if((fieldName==null)||(term.field()==fieldName)) + if((fieldName==null)||(term.field().equals(fieldName))) { terms.add(new WeightedTerm(query.getBoost(),term.text())); } Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java --- lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java Mon Jun 20 10:57:22 2011 -0400 @@ -41,7 +41,6 @@ import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.search.spans.Spans; -import org.apache.lucene.util.StringHelper; /** * Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether @@ -63,7 +62,7 @@ public WeightedSpanTermExtractor(String defaultField) { if (defaultField != null) { - this.defaultField = StringHelper.intern(defaultField); + this.defaultField = defaultField; } } @@ -314,8 +313,8 @@ * Necessary to implement matches for queries against defaultField */ private boolean fieldNameComparator(String fieldNameToCheck) { - boolean rv = fieldName == null || fieldNameToCheck == fieldName - || fieldNameToCheck == defaultField; + boolean rv = fieldName == null || fieldName.equals(fieldNameToCheck) + || (defaultField != null && defaultField.equals(fieldNameToCheck)); return rv; } @@ -372,7 +371,7 @@ public Map getWeightedSpanTerms(Query query, TokenStream tokenStream, String fieldName) throws IOException { if (fieldName != null) { - this.fieldName = StringHelper.intern(fieldName); + this.fieldName = fieldName; } else { this.fieldName = null; } @@ -408,7 +407,7 @@ public Map getWeightedSpanTermsWithScores(Query query, TokenStream tokenStream, String fieldName, IndexReader reader) throws IOException { if (fieldName != null) { - this.fieldName = StringHelper.intern(fieldName); + this.fieldName = fieldName; } else { this.fieldName = null; } Index: lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java --- lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java Mon Jun 20 10:57:22 2011 -0400 @@ -400,7 +400,7 @@ if (i < 0) { i = -i - 1; } - if (i >= orderedTerms.length || orderedTerms[i].field() != field) { + if (i >= orderedTerms.length || !orderedTerms[i].field().equals(field)) { // field does not exist return null; } Index: lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java --- lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java Mon Jun 20 10:57:22 2011 -0400 @@ -44,7 +44,6 @@ import org.apache.lucene.index.TermVectorOffsetInfo; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.SimilarityProvider; -import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.CollectionUtil; import org.apache.lucene.util.AttributeImpl; @@ -475,7 +474,7 @@ FieldSetting fieldSetting = fieldSettingsByFieldName.get(field.name()); if (fieldSetting == null) { fieldSetting = new FieldSetting(); - fieldSetting.fieldName = StringHelper.intern(field.name()); + fieldSetting.fieldName = field.name(); fieldSettingsByFieldName.put(fieldSetting.fieldName, fieldSetting); fieldNameBuffer.add(fieldSetting.fieldName); } Index: lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java --- lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java Mon Jun 20 10:57:22 2011 -0400 @@ -69,11 +69,9 @@ return SeekStatus.END; } - if (terms[upto].field() == field) { + if (terms[upto].field().equals(field)) { return SeekStatus.FOUND; } else { - // make sure field was interned - assert !terms[upto].field().equals(field); return SeekStatus.END; } } @@ -84,12 +82,10 @@ if (upto >= terms.length) { return null; } - if (terms[upto].field() == field) { + if (terms[upto].field().equals(field)) { br.copy(terms[upto].getTerm().text()); return br; } else { - // make sure field was interned - assert !terms[upto].field().equals(field); return null; } } Index: lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java --- lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java Mon Jun 20 10:57:22 2011 -0400 @@ -27,7 +27,6 @@ import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; -import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.Bits; import org.apache.lucene.util.ReaderUtil; @@ -111,7 +110,6 @@ * @param field the field whose norms should be reset */ public void reSetNorms(String field) throws IOException { - String fieldName = StringHelper.intern(field); Similarity fieldSim = sim.get(field); IndexReader reader = null; try { @@ -149,7 +147,7 @@ for (int d = 0; d < termCounts.length; d++) { if (delDocs == null || !delDocs.get(d)) { invertState.setLength(termCounts[d]); - subReader.setNorm(d, fieldName, fieldSim.encodeNormValue(fieldSim.computeNorm(invertState))); + subReader.setNorm(d, field, fieldSim.encodeNormValue(fieldSim.computeNorm(invertState))); } } } Index: lucene/contrib/misc/src/java/org/apache/lucene/index/TermVectorAccessor.java --- lucene/contrib/misc/src/java/org/apache/lucene/index/TermVectorAccessor.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/contrib/misc/src/java/org/apache/lucene/index/TermVectorAccessor.java Mon Jun 20 10:57:22 2011 -0400 @@ -15,7 +15,6 @@ * */ -import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -54,9 +53,6 @@ * @throws IOException */ public void accept(IndexReader indexReader, int documentNumber, String fieldName, TermVectorMapper mapper) throws IOException { - - fieldName = StringHelper.intern(fieldName); - decoratedMapper.decorated = mapper; decoratedMapper.termVectorStored = false; Index: lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java --- lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java Mon Jun 20 10:57:22 2011 -0400 @@ -190,7 +190,7 @@ CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); int corpusNumDocs=reader.numDocs(); - Term internSavingTemplateTerm =new Term(f.fieldName); //optimization to avoid constructing new Term() objects + Term protoTerm =new Term(f.fieldName); //optimization to avoid constructing new Term() objects HashSet processedTerms=new HashSet(); ts.reset(); while (ts.incrementToken()) @@ -201,7 +201,7 @@ processedTerms.add(term); ScoreTermQueue variantsQ=new ScoreTermQueue(MAX_VARIANTS_PER_TERM); //maxNum variants considered for any one term float minScore=0; - Term startTerm=internSavingTemplateTerm.createTerm(term); + Term startTerm=protoTerm.createTerm(term); AttributeSource atts = new AttributeSource(); MaxNonCompetitiveBoostAttribute maxBoostAtt = atts.addAttribute(MaxNonCompetitiveBoostAttribute.class); Index: lucene/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java --- lucene/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java Mon Jun 20 10:57:22 2011 -0400 @@ -71,7 +71,7 @@ DocsEnum docs = null; for (Iterator iter = terms.iterator(); iter.hasNext();) { Term term = iter.next(); - if (term.field() != lastField) { + if (!term.field().equals(lastField)) { termsC = fields.terms(term.field()); termsEnum = termsC.iterator(); lastField = term.field(); Index: lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java --- lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java Mon Jun 20 10:57:22 2011 -0400 @@ -67,14 +67,7 @@ ts.reset(); while (ts.incrementToken()) { termAtt.fillBytesRef(); - if (term == null) - { - term = new Term(fieldName, new BytesRef(bytes)); - } else - { -// create from previous to save fieldName.intern overhead - term = term.createTerm(new BytesRef(bytes)); - } + term = new Term(fieldName, new BytesRef(bytes)); tf.addTerm(term); } ts.end(); Index: lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsQueryBuilder.java --- lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsQueryBuilder.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsQueryBuilder.java Mon Jun 20 10:57:22 2011 -0400 @@ -64,14 +64,7 @@ ts.reset(); while (ts.incrementToken()) { termAtt.fillBytesRef(); - if (term == null) - { - term = new Term(fieldName, new BytesRef(bytes)); - } else - { -// create from previous to save fieldName.intern overhead - term = term.createTerm(new BytesRef(bytes)); - } + term = new Term(fieldName, new BytesRef(bytes)); bq.add(new BooleanClause(new TermQuery(term),BooleanClause.Occur.SHOULD)); } ts.end(); Index: lucene/src/java/org/apache/lucene/document/AbstractField.java --- lucene/src/java/org/apache/lucene/document/AbstractField.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/src/java/org/apache/lucene/document/AbstractField.java Mon Jun 20 10:57:22 2011 -0400 @@ -21,7 +21,6 @@ import org.apache.lucene.index.FieldInvertState; // for javadocs import org.apache.lucene.index.values.PerDocFieldValues; import org.apache.lucene.index.values.ValueType; -import org.apache.lucene.util.StringHelper; // for javadocs /** @@ -59,7 +58,7 @@ protected AbstractField(String name, Field.Store store, Field.Index index, Field.TermVector termVector) { if (name == null) throw new NullPointerException("name cannot be null"); - this.name = StringHelper.intern(name); // field names are interned + this.name = name; this.isStored = store.isStored(); this.isIndexed = index.isIndexed(); @@ -109,7 +108,7 @@ return boost; } - /** Returns the name of the field as an interned string. + /** Returns the name of the field. * For example "date", "title", "body", ... */ public String name() { return name; } Index: lucene/src/java/org/apache/lucene/document/Field.java --- lucene/src/java/org/apache/lucene/document/Field.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/src/java/org/apache/lucene/document/Field.java Mon Jun 20 10:57:22 2011 -0400 @@ -21,7 +21,6 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.util.StringHelper; /** A field is a section of a Document. Each field has two parts, a name and a @@ -367,28 +366,6 @@ * */ public Field(String name, String value, Store store, Index index, TermVector termVector) { - this(name, true, value, store, index, termVector); - } - - /** - * Create a field by specifying its name, value and how it will - * be saved in the index. - * - * @param name The name of the field - * @param internName Whether to .intern() name or not - * @param value The string to process - * @param store Whether value should be stored in the index - * @param index Whether the field should be indexed, and if so, if it should - * be tokenized before indexing - * @param termVector Whether term vector should be stored - * @throws NullPointerException if name or value is null - * @throws IllegalArgumentException in any of the following situations: - *
    - *
  • the field is neither stored nor indexed
  • - *
  • the field is not indexed but termVector is TermVector.YES
  • - *
- */ - public Field(String name, boolean internName, String value, Store store, Index index, TermVector termVector) { if (name == null) throw new NullPointerException("name cannot be null"); if (value == null) @@ -402,9 +379,6 @@ throw new IllegalArgumentException("cannot store term vector information " + "for a field that is not indexed"); - if (internName) // field names are optionally interned - name = StringHelper.intern(name); - this.name = name; this.fieldsData = value; @@ -422,7 +396,7 @@ setStoreTermVector(termVector); } - + /** * Create a tokenized and indexed field that is not stored. Term vectors will * not be stored. The Reader is read only when the Document is added to the index, @@ -454,7 +428,7 @@ if (reader == null) throw new NullPointerException("reader cannot be null"); - this.name = StringHelper.intern(name); // field names are interned + this.name = name; this.fieldsData = reader; this.isStored = false; @@ -500,7 +474,7 @@ if (tokenStream == null) throw new NullPointerException("tokenStream cannot be null"); - this.name = StringHelper.intern(name); // field names are interned + this.name = name; this.fieldsData = null; this.tokenStream = tokenStream; @@ -540,7 +514,7 @@ if (value == null) throw new IllegalArgumentException("value cannot be null"); - this.name = StringHelper.intern(name); // field names are interned + this.name = name; fieldsData = value; isStored = true; Index: lucene/src/java/org/apache/lucene/document/Fieldable.java --- lucene/src/java/org/apache/lucene/document/Fieldable.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/src/java/org/apache/lucene/document/Fieldable.java Mon Jun 20 10:57:22 2011 -0400 @@ -71,7 +71,7 @@ */ float getBoost(); - /** Returns the name of the field as an interned string. + /** Returns the name of the field. * For example "date", "title", "body", ... */ String name(); Index: lucene/src/java/org/apache/lucene/index/BufferedDeletes.java --- lucene/src/java/org/apache/lucene/index/BufferedDeletes.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/src/java/org/apache/lucene/index/BufferedDeletes.java Mon Jun 20 10:57:22 2011 -0400 @@ -47,11 +47,12 @@ key, Integer val, int hash, Entry next (OBJ_HEADER + 3*POINTER + INT). Term is object w/ String field and String text (OBJ_HEADER + 2*POINTER). - We don't count Term's field since it's interned. + Term's field is String (OBJ_HEADER + 4*INT + POINTER + + OBJ_HEADER + string.length*CHAR). Term's text is String (OBJ_HEADER + 4*INT + POINTER + OBJ_HEADER + string.length*CHAR). Integer is OBJ_HEADER + INT. */ - final static int BYTES_PER_DEL_TERM = 8*RamUsageEstimator.NUM_BYTES_OBJECT_REF + 5*RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 6*RamUsageEstimator.NUM_BYTES_INT; + final static int BYTES_PER_DEL_TERM = 9*RamUsageEstimator.NUM_BYTES_OBJECT_REF + 7*RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 10*RamUsageEstimator.NUM_BYTES_INT; /* Rough logic: del docIDs are List. Say list allocates ~2X size (2*POINTER). Integer is OBJ_HEADER @@ -189,7 +190,7 @@ terms.put(term, Integer.valueOf(docIDUpto)); numTermDeletes.incrementAndGet(); if (current == null) { - bytesUsed.addAndGet(BYTES_PER_DEL_TERM + term.bytes.length); + bytesUsed.addAndGet(BYTES_PER_DEL_TERM + term.bytes.length + (RamUsageEstimator.NUM_BYTES_CHAR * term.field().length())); } } Index: lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java --- lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java Mon Jun 20 10:57:22 2011 -0400 @@ -380,7 +380,7 @@ // Since we visit terms sorted, we gain performance // by re-using the same TermsEnum and seeking only // forwards - if (term.field() != currentField) { + if (!term.field().equals(currentField)) { assert currentField == null || currentField.compareTo(term.field()) < 0; currentField = term.field(); Terms terms = fields.terms(currentField); Index: lucene/src/java/org/apache/lucene/index/FieldInfos.java --- lucene/src/java/org/apache/lucene/index/FieldInfos.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/src/java/org/apache/lucene/index/FieldInfos.java Mon Jun 20 10:57:22 2011 -0400 @@ -36,7 +36,6 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.CodecUtil; -import org.apache.lucene.util.StringHelper; /** Access to the Fieldable Info file that describes document fields and whether or * not they are indexed. Each segment has a separate Fieldable Info file. Objects @@ -475,12 +474,11 @@ boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, ValueType docValuesType) { // don't check modifiable here since we use that to initially build up FIs - name = StringHelper.intern(name); if (globalFieldNumbers != null) { globalFieldNumbers.setIfNotSet(fieldNumber, name); } final FieldInfo fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, storePositionWithTermVector, - storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValuesType); + storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValuesType); putInternal(fi); return fi; } @@ -659,7 +657,7 @@ final int size = input.readVInt(); //read in the size for (int i = 0; i < size; i++) { - String name = StringHelper.intern(input.readString()); + String name = input.readString(); // if this is a previous format codec 0 will be preflex! final int fieldNumber = format <= FORMAT_PER_FIELD_CODEC? input.readInt():i; final int codecId = format <= FORMAT_PER_FIELD_CODEC? input.readInt():0; Index: lucene/src/java/org/apache/lucene/index/FieldsEnum.java --- lucene/src/java/org/apache/lucene/index/FieldsEnum.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/src/java/org/apache/lucene/index/FieldsEnum.java Mon Jun 20 10:57:22 2011 -0400 @@ -43,10 +43,8 @@ return atts; } - /** Increments the enumeration to the next field. The - * returned field is always interned, so simple == - * comparison is allowed. Returns null when there are no - * more fields.*/ + /** Increments the enumeration to the next field. Returns + * null when there are no more fields.*/ public abstract String next() throws IOException; /** Get {@link TermsEnum} for the current field. You Index: lucene/src/java/org/apache/lucene/index/FieldsReader.java --- lucene/src/java/org/apache/lucene/index/FieldsReader.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/src/java/org/apache/lucene/index/FieldsReader.java Mon Jun 20 10:57:22 2011 -0400 @@ -364,7 +364,6 @@ Field.Index index = Field.Index.toIndex(fi.isIndexed, tokenize); Field.TermVector termVector = Field.TermVector.toTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector); f = new Field(fi.name, // name - false, fieldsStream.readString(), // read value Field.Store.YES, index, Index: lucene/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java --- lucene/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java Mon Jun 20 10:57:22 2011 -0400 @@ -33,10 +33,11 @@ /* Rough logic: Term is object w/ String field and String text (OBJ_HEADER + 2*POINTER). - We don't count Term's field since it's interned. Term's text is String (OBJ_HEADER + 4*INT + POINTER + - OBJ_HEADER + string.length*CHAR). */ - final static int BYTES_PER_DEL_TERM = 3*RamUsageEstimator.NUM_BYTES_OBJECT_REF + 3*RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 4*RamUsageEstimator.NUM_BYTES_INT; + OBJ_HEADER + text.length*CHAR). + Term's field is String (OBJ_HEADER + 4*INT + POINTER + + OBJ_HEADER + field.length*CHAR). */ + final static int BYTES_PER_DEL_TERM = 4*RamUsageEstimator.NUM_BYTES_OBJECT_REF + 4*RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 8*RamUsageEstimator.NUM_BYTES_INT; /* Query we often undercount (say 24 bytes), plus int. */ final static int BYTES_PER_DEL_QUERY = RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.NUM_BYTES_INT + 24; @@ -70,6 +71,8 @@ queryLimits[upto] = ent.getValue(); upto++; } + // nocommit -- not right? fails to account for the + // char[] for field & text of each term? bytesUsed = terms.length * BYTES_PER_DEL_TERM + queries.length * BYTES_PER_DEL_QUERY; numTermDeletes = deletes.numTermDeletes.get(); } Index: lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java --- lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java Mon Jun 20 10:57:22 2011 -0400 @@ -94,7 +94,7 @@ if (queue.size() > 0) { while(true) { top[numTop++] = queue.pop(); - if (queue.size() == 0 || (queue.top()).current != top[0].current) { + if (queue.size() == 0 || !(queue.top()).current.equals(top[0].current)) { break; } } Index: lucene/src/java/org/apache/lucene/index/Term.java --- lucene/src/java/org/apache/lucene/index/Term.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/src/java/org/apache/lucene/index/Term.java Mon Jun 20 10:57:22 2011 -0400 @@ -20,12 +20,11 @@ import java.util.Comparator; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.StringHelper; /** A Term represents a word from text. This is the unit of search. It is composed of two elements, the text of the word, as a string, and the name of - the field that the text occurred in, an interned string. + the field that the text occurred in. Note that terms may represent more than words from text fields, but also things like dates, email addresses, urls, etc. */ @@ -37,13 +36,14 @@ /** Constructs a Term with the given field and bytes. *

Note that a null field or null bytes value results in undefined * behavior for most Lucene APIs that accept a Term parameter. + * *

WARNING: the provided BytesRef is not copied, but used directly. * Therefore the bytes should not be modified after construction, for * example, you should clone a copy rather than pass reused bytes from * a TermsEnum. */ public Term(String fld, BytesRef bytes) { - field = fld == null ? null : StringHelper.intern(fld); + field = fld; this.bytes = bytes; } @@ -61,28 +61,10 @@ * @param fld */ public Term(String fld) { - this(fld, new BytesRef(), true); + this(fld, new BytesRef()); } - /** - * WARNING: the provided BytesRef is not copied, but used directly. - * Therefore the bytes should not be modified after construction, for - * example, you should clone a copy rather than pass reused bytes from - * a TermsEnum. - * - * @lucene.experimental - */ - public Term(String fld, BytesRef bytes, boolean intern) { - field = intern ? StringHelper.intern(fld) : fld; // field names are interned - this.bytes = bytes; // unless already known to be - } - - /** @lucene.experimental */ - public Term(String fld, String text, boolean intern) { - this(fld, new BytesRef(text), intern); - } - - /** Returns the field of this term, an interned string. The field indicates + /** Returns the field of this term. The field indicates the part of a document which this term came from. */ public final String field() { return field; } @@ -95,8 +77,8 @@ public final BytesRef bytes() { return bytes; } /** - * Optimized construction of new Terms by reusing same field as this Term - * - avoids field.intern() overhead + * Optimized construction of new Terms by reusing same field as this Term. + * *

WARNING: the provided BytesRef is not copied, but used directly. * Therefore the bytes should not be modified after construction, for * example, you should clone a copy rather than pass reused bytes from @@ -104,20 +86,17 @@ * @param bytes The bytes of the new term (field is implicitly same as this Term instance) * @return A new Term */ - public Term createTerm(BytesRef bytes) - { - return new Term(field,bytes,false); + public Term createTerm(BytesRef bytes) { + return new Term(field, bytes); } /** * Optimized construction of new Terms by reusing same field as this Term - * - avoids field.intern() overhead * @param text The text of the new term (field is implicitly same as this Term instance) * @return A new Term */ - public Term createTerm(String text) - { - return new Term(field,text,false); + public Term createTerm(String text) { + return new Term(field, text); } @Override @@ -157,26 +136,29 @@ The ordering of terms is first by field, then by text.*/ public final int compareTo(Term other) { - if (field == other.field) // fields are interned + if (field.equals(other.field)) { return bytes.compareTo(other.bytes); - else + } else { return field.compareTo(other.field); + } } @Deprecated private static final Comparator legacyComparator = BytesRef.getUTF8SortedAsUTF16Comparator(); + // nocommit -- make private to preflex/TestSurrogates /** * @deprecated (4.0) For internal backwards compatibility use only * @lucene.internal */ @Deprecated public final int compareToUTF16(Term other) { - if (field == other.field) // fields are interned + if (field.equals(other.field)) { return legacyComparator.compare(this.bytes, other.bytes); - else + } else { return field.compareTo(other.field); + } } /** Index: lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java --- lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java Mon Jun 20 10:57:22 2011 -0400 @@ -91,7 +91,7 @@ @Override public boolean equals(Object _other) { FieldAndTerm other = (FieldAndTerm) _other; - return other.field == field && term.bytesEquals(other.term); + return other.field.equals(field) && term.bytesEquals(other.term); } @Override Index: lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java --- lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java Mon Jun 20 10:57:22 2011 -0400 @@ -270,6 +270,7 @@ private class PreTermsEnum extends TermsEnum { private SegmentTermEnum termEnum; private FieldInfo fieldInfo; + private String internedFieldName; private boolean skipNext; private BytesRef current; @@ -343,7 +344,7 @@ // Cannot be null (or move to next field) because at // "worst" it'd seek to the same term we are on now, // unless we are being called from seek - if (t2 == null || t2.field() != fieldInfo.name) { + if (t2 == null || t2.field() != internedFieldName) { return false; } @@ -467,7 +468,7 @@ // We could hit EOF or different field since this // was a seek "forward": - if (t2 != null && t2.field() == fieldInfo.name) { + if (t2 != null && t2.field() == internedFieldName) { if (DEBUG_SURROGATES) { System.out.println(" got term=" + UnicodeUtil.toHexString(t2.text()) + " " + t2.bytes()); @@ -552,7 +553,7 @@ // current term. // TODO: can we avoid this copy? - if (termEnum.term() == null || termEnum.term().field() != fieldInfo.name) { + if (termEnum.term() == null || termEnum.term().field() != internedFieldName) { scratchTerm.length = 0; } else { scratchTerm.copy(termEnum.term().bytes()); @@ -659,7 +660,7 @@ // EOF or a different field: boolean matches; - if (t2 != null && t2.field() == fieldInfo.name) { + if (t2 != null && t2.field() == internedFieldName) { final BytesRef b2 = t2.bytes(); assert b2.offset == 0; if (b2.length >= upTo+3 && isHighBMPChar(b2.bytes, upTo)) { @@ -713,7 +714,8 @@ void reset(FieldInfo fieldInfo) throws IOException { //System.out.println("pff.reset te=" + termEnum); this.fieldInfo = fieldInfo; - protoTerm = new Term(fieldInfo.name); + internedFieldName = fieldInfo.name.intern(); + protoTerm = new Term(internedFieldName); if (termEnum == null) { termEnum = getTermsDict().terms(protoTerm); seekTermEnum = getTermsDict().terms(protoTerm); @@ -726,7 +728,7 @@ unicodeSortOrder = sortTermsByUnicode(); final Term t = termEnum.term(); - if (t != null && t.field() == fieldInfo.name) { + if (t != null && t.field() == internedFieldName) { newSuffixStart = 0; prevTerm.length = 0; surrogateDance(); @@ -769,7 +771,7 @@ final Term t = termEnum.term(); - if (t != null && t.field() == fieldInfo.name && term.bytesEquals(t.bytes())) { + if (t != null && t.field() == internedFieldName && term.bytesEquals(t.bytes())) { // If we found an exact match, no need to do the // surrogate dance if (DEBUG_SURROGATES) { @@ -777,7 +779,7 @@ } current = t.bytes(); return SeekStatus.FOUND; - } else if (t == null || t.field() != fieldInfo.name) { + } else if (t == null || t.field() != internedFieldName) { // TODO: maybe we can handle this like the next() // into null? set term as prevTerm then dance? @@ -840,8 +842,9 @@ surrogateDance(); final Term t2 = termEnum.term(); - if (t2 == null || t2.field() != fieldInfo.name) { - assert t2 == null || !t2.field().equals(fieldInfo.name); // make sure fields are in fact interned + if (t2 == null || t2.field() != internedFieldName) { + // PreFlex codec interns field names; verify: + assert t2 == null || !t2.field().equals(internedFieldName); current = null; return SeekStatus.END; } else { @@ -885,7 +888,8 @@ skipNext = false; if (termEnum.term() == null) { return null; - } else if (termEnum.term().field() != fieldInfo.name) { + // PreFlex codec interns field names: + } else if (termEnum.term().field() != internedFieldName) { return null; } else { return current = termEnum.term().bytes(); @@ -895,15 +899,16 @@ // TODO: can we use STE's prevBuffer here? prevTerm.copy(termEnum.term().bytes()); - if (termEnum.next() && termEnum.term().field() == fieldInfo.name) { + if (termEnum.next() && termEnum.term().field() == internedFieldName) { newSuffixStart = termEnum.newSuffixStart; if (DEBUG_SURROGATES) { System.out.println(" newSuffixStart=" + newSuffixStart); } surrogateDance(); final Term t = termEnum.term(); - if (t == null || t.field() != fieldInfo.name) { - assert t == null || !t.field().equals(fieldInfo.name); // make sure fields are in fact interned + if (t == null || t.field() != internedFieldName) { + // PreFlex codec interns field names; verify: + assert t == null || !t.field().equals(internedFieldName); current = null; } else { current = t.bytes(); @@ -920,8 +925,9 @@ surrogateDance(); final Term t = termEnum.term(); - if (t == null || t.field() != fieldInfo.name) { - assert t == null || !t.field().equals(fieldInfo.name); // make sure fields are in fact interned + if (t == null || t.field() != internedFieldName) { + // PreFlex codec interns field names; verify: + assert t == null || !t.field().equals(internedFieldName); return null; } else { current = t.bytes(); Index: lucene/src/java/org/apache/lucene/index/codecs/preflex/TermBuffer.java --- lucene/src/java/org/apache/lucene/index/codecs/preflex/TermBuffer.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/src/java/org/apache/lucene/index/codecs/preflex/TermBuffer.java Mon Jun 20 10:57:22 2011 -0400 @@ -37,12 +37,17 @@ private BytesRef bytes = new BytesRef(10); + // Cannot be -1 since (strangely) we write that + // fieldNumber into index for first indexed term: + private int currentFieldNumber = -2; + private static final Comparator utf8AsUTF16Comparator = BytesRef.getUTF8SortedAsUTF16Comparator(); int newSuffixStart; // only valid right after .read is called public int compareTo(TermBuffer other) { if (field == other.field) // fields are interned + // (only by PreFlex codec) return utf8AsUTF16Comparator.compare(bytes, other.bytes); else return field.compareTo(other.field); @@ -59,7 +64,13 @@ } bytes.length = totalLength; input.readBytes(bytes.bytes, newSuffixStart, length); - this.field = fieldInfos.fieldName(input.readVInt()); + final int fieldNumber = input.readVInt(); + if (fieldNumber != currentFieldNumber) { + currentFieldNumber = fieldNumber; + field = fieldInfos.fieldName(currentFieldNumber).intern(); + } else { + assert field.equals(fieldInfos.fieldName(fieldNumber)): "currentFieldNumber=" + currentFieldNumber + " field=" + field + " vs " + fieldInfos.fieldName(fieldNumber); + } } public void set(Term term) { @@ -68,12 +79,14 @@ return; } bytes.copy(term.bytes()); - field = term.field(); + field = term.field().intern(); + currentFieldNumber = -1; this.term = term; } public void set(TermBuffer other) { field = other.field; + currentFieldNumber = other.currentFieldNumber; // dangerous to copy Term over, since the underlying // BytesRef could subsequently be modified: term = null; @@ -83,6 +96,7 @@ public void reset() { field = null; term = null; + currentFieldNumber= -1; } public Term toTerm() { @@ -90,8 +104,7 @@ return null; if (term == null) { - term = new Term(field, new BytesRef(bytes), false); - //term = new Term(field, bytes, false); + term = new Term(field, new BytesRef(bytes)); } return term; Index: lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java --- lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java Mon Jun 20 10:57:22 2011 -0400 @@ -30,7 +30,6 @@ import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.CharsRef; -import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.fst.Builder; import org.apache.lucene.util.fst.BytesRefFSTEnum; @@ -102,9 +101,7 @@ return null; } if (scratch.startsWith(FIELD)) { - String field = StringHelper.intern(new String(scratch.bytes, scratch.offset + FIELD.length, scratch.length - FIELD.length, "UTF-8")); - current = field; - return field; + return current = new String(scratch.bytes, scratch.offset + FIELD.length, scratch.length - FIELD.length, "UTF-8"); } } } Index: lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java --- lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java Mon Jun 20 10:57:22 2011 -0400 @@ -42,7 +42,6 @@ import org.apache.lucene.search.cache.CachedArray.LongValues; import org.apache.lucene.search.cache.CachedArray.ShortValues; import org.apache.lucene.util.FieldCacheSanityChecker; -import org.apache.lucene.util.StringHelper; /** * Expert: The default cache implementation, storing all values in memory. @@ -246,7 +245,7 @@ /** Creates one of these objects for a custom comparator/parser. */ Entry (String field, EntryCreator custom) { - this.field = StringHelper.intern(field); + this.field = field; this.creator = custom; } @@ -255,7 +254,7 @@ public boolean equals (Object o) { if (o instanceof Entry) { Entry other = (Entry) o; - if (other.field == field) { + if (other.field.equals(field)) { if (other.creator == null) { if (creator == null) return true; } else if (other.creator.equals (creator)) { Index: lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java --- lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java Mon Jun 20 10:57:22 2011 -0400 @@ -88,7 +88,7 @@ field = terms[0].field(); for (int i = 0; i < terms.length; i++) { - if (terms[i].field() != field) { + if (!terms[i].field().equals(field)) { throw new IllegalArgumentException( "All phrase terms must be in the same field (" + field + "): " + terms[i]); Index: lucene/src/java/org/apache/lucene/search/PhraseQuery.java --- lucene/src/java/org/apache/lucene/search/PhraseQuery.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/src/java/org/apache/lucene/search/PhraseQuery.java Mon Jun 20 10:57:22 2011 -0400 @@ -85,14 +85,15 @@ * @param position */ public void add(Term term, int position) { - if (terms.size() == 0) - field = term.field(); - else if (term.field() != field) - throw new IllegalArgumentException("All phrase terms must be in the same field: " + term); + if (terms.size() == 0) { + field = term.field(); + } else if (!term.field().equals(field)) { + throw new IllegalArgumentException("All phrase terms must be in the same field: " + term); + } - terms.add(term); - positions.add(Integer.valueOf(position)); - if (position > maxPosition) maxPosition = position; + terms.add(term); + positions.add(Integer.valueOf(position)); + if (position > maxPosition) maxPosition = position; } /** Returns the set of terms in this phrase. */ Index: lucene/src/java/org/apache/lucene/search/SortField.java --- lucene/src/java/org/apache/lucene/search/SortField.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/src/java/org/apache/lucene/search/SortField.java Mon Jun 20 10:57:22 2011 -0400 @@ -21,8 +21,8 @@ import java.util.Comparator; import org.apache.lucene.search.cache.*; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.StringHelper; -import org.apache.lucene.util.BytesRef; // TODO(simonw) -- for cleaner transition, maybe we should make // a new SortField that subclasses this one and always uses @@ -166,7 +166,7 @@ if (field == null) { throw new IllegalArgumentException("field can only be null when type is SCORE or DOC"); } - this.field = StringHelper.intern(field); + this.field = field; this.reverse = reverse; if (parser instanceof FieldCache.IntParser) { @@ -205,7 +205,7 @@ */ public SortField( CachedArrayCreator creator, boolean reverse ) { - this.field = StringHelper.intern(creator.field); + this.field = creator.field; this.reverse = reverse; this.creator = creator; this.type = creator.getSortTypeID(); @@ -253,7 +253,7 @@ if (type != SCORE && type != DOC) throw new IllegalArgumentException("field can only be null when type is SCORE or DOC"); } else { - this.field = StringHelper.intern(field); + this.field = field; } if( creator != null ) { @@ -381,7 +381,7 @@ if (!(o instanceof SortField)) return false; final SortField other = (SortField)o; return ( - other.field == this.field // field is always interned + StringHelper.equals(other.field, this.field) && other.type == this.type && other.reverse == this.reverse && (other.comparatorSource == null ? this.comparatorSource == null : other.comparatorSource.equals(this.comparatorSource)) Index: lucene/src/java/org/apache/lucene/search/cache/DocTermsCreator.java --- lucene/src/java/org/apache/lucene/search/cache/DocTermsCreator.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/src/java/org/apache/lucene/search/cache/DocTermsCreator.java Mon Jun 20 10:57:22 2011 -0400 @@ -29,7 +29,6 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.PagedBytes; -import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.packed.GrowableWriter; import org.apache.lucene.util.packed.PackedInts; @@ -66,7 +65,6 @@ @Override public DocTerms create(IndexReader reader) throws IOException { - String field = StringHelper.intern(this.field); // TODO?? necessary? Terms terms = MultiFields.getTerms(reader, field); final boolean fasterButMoreRAM = hasOption( FASTER_BUT_MORE_RAM ); Index: lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java --- lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java Mon Jun 20 10:57:22 2011 -0400 @@ -34,7 +34,6 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.PagedBytes; -import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.packed.GrowableWriter; import org.apache.lucene.util.packed.PackedInts; @@ -70,7 +69,6 @@ @Override public DocTermsIndex create(IndexReader reader) throws IOException { - String field = StringHelper.intern(this.field); // TODO?? necessary? Terms terms = MultiFields.getTerms(reader, field); final boolean fasterButMoreRAM = hasOption(FASTER_BUT_MORE_RAM); Index: lucene/src/java/org/apache/lucene/util/FieldCacheSanityChecker.java --- lucene/src/java/org/apache/lucene/util/FieldCacheSanityChecker.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/src/java/org/apache/lucene/util/FieldCacheSanityChecker.java Mon Jun 20 10:57:22 2011 -0400 @@ -208,7 +208,7 @@ if (seen.contains(rf)) continue; - List kids = getAllDecendentReaderKeys(rf.readerKey); + List kids = getAllDescendentReaderKeys(rf.readerKey); for (Object kidKey : kids) { ReaderField kid = new ReaderField(kidKey, rf.fieldName); @@ -252,7 +252,7 @@ badness = badEntries.toArray(badness); insanity.add(new Insanity(InsanityType.SUBREADER, - "Found caches for decendents of " + + "Found caches for descendants of " + parent.toString(), badness)); } @@ -266,7 +266,7 @@ * the hierarchy of subReaders building up a list of the objects * returned by obj.getFieldCacheKey() */ - private List getAllDecendentReaderKeys(Object seed) { + private List getAllDescendentReaderKeys(Object seed) { List all = new ArrayList(17); // will grow as we iter all.add(seed); for (int i = 0; i < all.size(); i++) { Index: lucene/src/java/org/apache/lucene/util/SimpleStringInterner.java --- lucene/src/java/org/apache/lucene/util/SimpleStringInterner.java Mon Jun 20 10:31:20 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,85 +0,0 @@ -package org.apache.lucene.util; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/** - * Simple lockless and memory barrier free String intern cache that is guaranteed - * to return the same String instance as String.intern() - * does. - * - * @lucene.internal - */ -public class SimpleStringInterner extends StringInterner { - - private static class Entry { - final private String str; - final private int hash; - private Entry next; - private Entry(String str, int hash, Entry next) { - this.str = str; - this.hash = hash; - this.next = next; - } - } - - private final Entry[] cache; - private final int maxChainLength; - - /** - * @param tableSize Size of the hash table, should be a power of two. - * @param maxChainLength Maximum length of each bucket, after which the oldest item inserted is dropped. - */ - public SimpleStringInterner(int tableSize, int maxChainLength) { - cache = new Entry[Math.max(1,BitUtil.nextHighestPowerOfTwo(tableSize))]; - this.maxChainLength = Math.max(2,maxChainLength); - } - - @Override - public String intern(String s) { - int h = s.hashCode(); - // In the future, it may be worth augmenting the string hash - // if the lower bits need better distribution. - int slot = h & (cache.length-1); - - Entry first = this.cache[slot]; - Entry nextToLast = null; - - int chainLength = 0; - - for(Entry e=first; e!=null; e=e.next) { - if (e.hash == h && (e.str == s || e.str.compareTo(s)==0)) { - // if (e.str == s || (e.hash == h && e.str.compareTo(s)==0)) { - return e.str; - } - - chainLength++; - if (e.next != null) { - nextToLast = e; - } - } - - // insertion-order cache: add new entry at head - s = s.intern(); - this.cache[slot] = new Entry(s, h, first); - if (chainLength >= maxChainLength) { - // prune last entry - nextToLast.next = null; - } - return s; - } -} \ No newline at end of file Index: lucene/src/java/org/apache/lucene/util/StringHelper.java --- lucene/src/java/org/apache/lucene/util/StringHelper.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/src/java/org/apache/lucene/util/StringHelper.java Mon Jun 20 10:57:22 2011 -0400 @@ -27,17 +27,6 @@ * @lucene.internal */ public abstract class StringHelper { - /** - * Expert: - * The StringInterner implementation used by Lucene. - * This shouldn't be changed to an incompatible implementation after other Lucene APIs have been used. - */ - public static StringInterner interner = new SimpleStringInterner(1024,8); - - /** Return the same string object for all equal strings */ - public static String intern(String s) { - return interner.intern(s); - } /** * Compares two byte[] arrays, element by element, and returns the @@ -95,4 +84,12 @@ return 0; } }; + + public static boolean equals(String s1, String s2) { + if (s1 == null) { + return s2 == null; + } else { + return s1.equals(s2); + } + } } Index: lucene/src/java/org/apache/lucene/util/StringInterner.java --- lucene/src/java/org/apache/lucene/util/StringInterner.java Mon Jun 20 10:31:20 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,37 +0,0 @@ -package org.apache.lucene.util; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Subclasses of StringInterner are required to - * return the same single String object for all equal strings. - * Depending on the implementation, this may not be - * the same object returned as String.intern(). - * - * This StringInterner base class simply delegates to String.intern(). - */ -public class StringInterner { - /** Returns a single object instance for each equal string. */ - public String intern(String s) { - return s.intern(); - } - - /** Returns a single object instance for each equal string. */ - public String intern(char[] arr, int offset, int len) { - return intern(new String(arr, offset, len)); - } -} Index: lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java --- lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java Mon Jun 20 10:57:22 2011 -0400 @@ -713,8 +713,12 @@ throw e; } + if (insanity.length != 0) { + reportAdditionalFailureInfo(); + } + assertEquals(msg + ": Insane FieldCache usage(s) found", - 0, insanity.length); + 0, insanity.length); insanity = null; } finally { @@ -1104,11 +1108,20 @@ /** Returns a new field instance, using the specified random. * See {@link #newField(String, String, Field.Store, Field.Index, Field.TermVector)} for more information */ public static Field newField(Random random, String name, String value, Store store, Index index, TermVector tv) { + + // nocommit + name = new String(name); + if (usually(random)) { // most of the time, don't modify the params return new Field(name, value, store, index, tv); } + if (random.nextBoolean()) { + // tickle any code still relying on field names being interned: + name = new String(name); + } + if (!index.isIndexed()) return new Field(name, value, store, index, tv); Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java --- lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java Mon Jun 20 10:57:22 2011 -0400 @@ -940,7 +940,7 @@ count++; } } - assertTrue("flush happened too quickly during " + (doIndexing ? "indexing" : "deleting") + " count=" + count, count > 3000); + assertTrue("flush happened too quickly during " + (doIndexing ? "indexing" : "deleting") + " count=" + count, count > 2500); } w.close(); dir.close(); Index: lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java --- lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java Mon Jun 20 10:57:22 2011 -0400 @@ -312,7 +312,6 @@ int[] r2r1 = new int[r2.maxDoc()]; // r2 id to r1 id mapping // create mapping from id2 space to id2 based on idField - idField = StringHelper.intern(idField); final Fields f1 = MultiFields.getFields(r1); if (f1 == null) { // make sure r2 is empty Index: lucene/src/test/org/apache/lucene/index/codecs/preflex/TestSurrogates.java --- lucene/src/test/org/apache/lucene/index/codecs/preflex/TestSurrogates.java Mon Jun 20 10:31:20 2011 -0400 +++ lucene/src/test/org/apache/lucene/index/codecs/preflex/TestSurrogates.java Mon Jun 20 10:57:22 2011 -0400 @@ -171,7 +171,7 @@ break; } term = fieldTerms.get(1+spot+i); - if (term.field() != field) { + if (!term.field().equals(field)) { assertNull(te.next()); break; } else { @@ -224,7 +224,7 @@ spot = -spot - 1; - if (spot == fieldTerms.size() || fieldTerms.get(spot).field() != field) { + if (spot == fieldTerms.size() || !fieldTerms.get(spot).field().equals(field)) { assertEquals(TermsEnum.SeekStatus.END, te.seek(tx.bytes())); } else { assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seek(tx.bytes())); @@ -247,7 +247,7 @@ break; } Term term = fieldTerms.get(1+spot+i); - if (term.field() != field) { + if (!term.field().equals(field)) { assertNull(te.next()); break; } else { Index: lucene/src/test/org/apache/lucene/util/TestStringIntern.java --- lucene/src/test/org/apache/lucene/util/TestStringIntern.java Mon Jun 20 10:31:20 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,89 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.util; -import java.util.Random; - -public class TestStringIntern extends LuceneTestCase { - String[] testStrings; - String[] internedStrings; - - private String randStr(int len) { - char[] arr = new char[len]; - for (int i=0; i getWordsIterator() { Index: solr/src/java/org/apache/solr/handler/component/QueryElevationComponent.java --- solr/src/java/org/apache/solr/handler/component/QueryElevationComponent.java Mon Jun 20 10:31:20 2011 -0400 +++ solr/src/java/org/apache/solr/handler/component/QueryElevationComponent.java Mon Jun 20 10:57:22 2011 -0400 @@ -46,7 +46,6 @@ import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.*; -import org.apache.lucene.util.StringHelper; import org.apache.solr.cloud.ZkController; import org.apache.lucene.util.BytesRef; import org.apache.solr.common.SolrException; @@ -161,7 +160,7 @@ throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "QueryElevationComponent requires the schema to have a uniqueKeyField implemented using StrField" ); } - idField = StringHelper.intern(sf.getName()); + idField = sf.getName(); forceElevation = initArgs.getBool( QueryElevationParams.FORCE_ELEVATION, forceElevation ); try { Index: solr/src/java/org/apache/solr/request/SimpleFacets.java --- solr/src/java/org/apache/solr/request/SimpleFacets.java Mon Jun 20 10:31:20 2011 -0400 +++ solr/src/java/org/apache/solr/request/SimpleFacets.java Mon Jun 20 10:57:22 2011 -0400 @@ -22,7 +22,6 @@ import org.apache.lucene.search.*; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; -import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.packed.Direct16; import org.apache.lucene.util.packed.Direct32; @@ -674,7 +673,7 @@ if (deState==null) { deState = new SolrIndexSearcher.DocsEnumState(); - deState.fieldName = StringHelper.intern(field); + deState.fieldName = field; deState.deletedDocs = MultiFields.getDeletedDocs(r); deState.termsEnum = termsEnum; deState.docsEnum = docsEnum; Index: solr/src/java/org/apache/solr/request/UnInvertedField.java --- solr/src/java/org/apache/solr/request/UnInvertedField.java Mon Jun 20 10:31:20 2011 -0400 +++ solr/src/java/org/apache/solr/request/UnInvertedField.java Mon Jun 20 10:57:22 2011 -0400 @@ -23,7 +23,6 @@ import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermRangeQuery; -import org.apache.lucene.util.StringHelper; import org.apache.solr.common.params.FacetParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.SolrException; @@ -123,7 +122,7 @@ if (deState == null) { deState = new SolrIndexSearcher.DocsEnumState(); - deState.fieldName = StringHelper.intern(field); + deState.fieldName = field; // deState.termsEnum = te.tenum; deState.termsEnum = te; // TODO: check for MultiTermsEnum in SolrIndexSearcher could now fail? deState.docsEnum = docsEnum; Index: solr/src/java/org/apache/solr/response/JSONResponseWriter.java --- solr/src/java/org/apache/solr/response/JSONResponseWriter.java Mon Jun 20 10:31:20 2011 -0400 +++ solr/src/java/org/apache/solr/response/JSONResponseWriter.java Mon Jun 20 10:57:22 2011 -0400 @@ -27,7 +27,6 @@ import java.util.Set; import org.apache.lucene.document.Fieldable; -import org.apache.lucene.util.StringHelper; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.util.NamedList; @@ -75,7 +74,7 @@ public JSONWriter(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp) { super(writer, req, rsp); - namedListStyle = StringHelper.intern(req.getParams().get(JSON_NL_STYLE, JSON_NL_FLAT)); + namedListStyle = req.getParams().get(JSON_NL_STYLE, JSON_NL_FLAT).intern(); wrapperFunction = req.getParams().get(JSON_WRAPPER_FUNCTION); } Index: solr/src/java/org/apache/solr/search/JoinQParserPlugin.java --- solr/src/java/org/apache/solr/search/JoinQParserPlugin.java Mon Jun 20 10:31:20 2011 -0400 +++ solr/src/java/org/apache/solr/search/JoinQParserPlugin.java Mon Jun 20 10:57:22 2011 -0400 @@ -22,7 +22,6 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.OpenBitSet; -import org.apache.lucene.util.StringHelper; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; @@ -290,14 +289,14 @@ Bits toDeletedDocs = fromSearcher == toSearcher ? fromDeletedDocs : MultiFields.getDeletedDocs(toSearcher.getIndexReader()); fromDeState = new SolrIndexSearcher.DocsEnumState(); - fromDeState.fieldName = StringHelper.intern(fromField); + fromDeState.fieldName = fromField; fromDeState.deletedDocs = fromDeletedDocs; fromDeState.termsEnum = termsEnum; fromDeState.docsEnum = null; fromDeState.minSetSizeCached = minDocFreqFrom; toDeState = new SolrIndexSearcher.DocsEnumState(); - toDeState.fieldName = StringHelper.intern(toField); + toDeState.fieldName = toField; toDeState.deletedDocs = toDeletedDocs; toDeState.termsEnum = toTermsEnum; toDeState.docsEnum = null; Index: solr/src/java/org/apache/solr/search/SolrIndexSearcher.java --- solr/src/java/org/apache/solr/search/SolrIndexSearcher.java Mon Jun 20 10:31:20 2011 -0400 +++ solr/src/java/org/apache/solr/search/SolrIndexSearcher.java Mon Jun 20 10:57:22 2011 -0400 @@ -715,7 +715,7 @@ TermQuery key = null; if (useCache) { - key = new TermQuery(new Term(deState.fieldName, new BytesRef(deState.termsEnum.term()), false)); + key = new TermQuery(new Term(deState.fieldName, new BytesRef(deState.termsEnum.term()))); DocSet result = filterCache.get(key); if (result != null) return result; } Index: solr/src/java/org/apache/solr/search/function/FileFloatSource.java --- solr/src/java/org/apache/solr/search/function/FileFloatSource.java Mon Jun 20 10:31:20 2011 -0400 +++ solr/src/java/org/apache/solr/search/function/FileFloatSource.java Mon Jun 20 10:57:22 2011 -0400 @@ -35,7 +35,6 @@ import org.apache.lucene.index.IndexReader.ReaderContext; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.ReaderUtil; -import org.apache.lucene.util.StringHelper; import org.apache.solr.core.SolrCore; import org.apache.solr.handler.RequestHandlerBase; import org.apache.solr.handler.RequestHandlerUtils; @@ -224,7 +223,7 @@ BufferedReader r = new BufferedReader(new InputStreamReader(is)); - String idName = StringHelper.intern(ffs.keyField.getName()); + String idName = ffs.keyField.getName(); FieldType idType = ffs.keyField.getType(); // warning: lucene's termEnum.skipTo() is not optimized... it simply does a next()