Property changes on: . ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/branches/branch_3x:r1060784 Property changes on: lucene ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/branches/branch_3x/lucene:r1060784 Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1060792) +++ lucene/CHANGES.txt (working copy) @@ -362,9 +362,9 @@ * LUCENE-2302: The new interface for term attributes, CharTermAttribute, now implements CharSequence. This requires the toString() methods of CharTermAttribute, deprecated TermAttribute, and Token to return only - the term text and no other attribute contents. - TODO: Point to new attribute inspection API coming with LUCENE-2374. - (Uwe Schindler, Robert Muir) + the term text and no other attribute contents. LUCENE-2374 implements + an attribute reflection API to no longer rely on toString() for attribute + inspection. (Uwe Schindler, Robert Muir) * LUCENE-2372, LUCENE-2389: StandardAnalyzer, KeywordAnalyzer, PerFieldAnalyzerWrapper, WhitespaceTokenizer are now final. Also removed @@ -592,6 +592,23 @@ to ensure that the norm is encoded with your Similarity. (Robert Muir, Mike McCandless) +* LUCENE-2374: Added Attribute reflection API: It's now possible to inspect the + contents of AttributeImpl and AttributeSource using a well-defined API. + This is e.g. used by Solr's AnalysisRequestHandlers to display all attributes + in a structured way. + There are also some backwards incompatible changes in toString() output, + as LUCENE-2302 introduced the CharSequence interface to CharTermAttribute + leading to changed toString() return values. The new API allows to get a + string representation in a well-defined way using a new method + reflectAsString(). For backwards compatibility reasons, when toString() + was implemented by implementation subclasses, the default implementation of + AttributeImpl.reflectWith() uses toString()s output instead to report the + Attribute's properties. Otherwise, reflectWith() uses Java's reflection + (like toString() did before) to get the attribute properties. + In addition, the mandatory equals() and hashCode() are no longer required + for AttributeImpls, but can still be provided (if needed). + (Uwe Schindler) + Bug fixes * LUCENE-2249: ParallelMultiSearcher should shut down thread pool on Index: lucene/MIGRATE.txt =================================================================== --- lucene/MIGRATE.txt (revision 1060792) +++ lucene/MIGRATE.txt (working copy) @@ -328,3 +328,9 @@ * LUCENE-2761: DataInput.readVInt/readVLong and DataOutput.writeVInt/writeVLong are final. If you subclassed this code before to encode variable-length integers in some specialized way, use the Codec API instead. + +* LUCENE-2374: The backwards layer in AttributeImpl was removed. To support correct + reflection of AttributeImpl instances, you have to override reflectWith() to + customize output. toString() is no longer implemented by AttributeImpl, so + if you have overridden toString(), port your customization over to reflectWith(). + reflectAsString() would then return what toString() did before. Index: lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java =================================================================== --- lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java (revision 1060792) +++ lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java (working copy) @@ -19,6 +19,7 @@ import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.AttributeReflector; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.document.NumericField; // for javadocs @@ -154,18 +155,18 @@ public void clear() { // this attribute has no contents to clear } - + @Override - public boolean equals(Object other) { - return other == this; + public void reflectWith(AttributeReflector reflector) { + final BytesRef bytes = new BytesRef(); + toBytesRef(bytes); + reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes); + reflector.reflect(NumericTermAttribute.class, "shift", getShift()); + reflector.reflect(NumericTermAttribute.class, "rawValue", getRawValue()); + reflector.reflect(NumericTermAttribute.class, "valueSize", getValueSize()); } - + @Override - public int hashCode() { - return System.identityHashCode(this); - } - - @Override public void copyTo(AttributeImpl target) { // this attribute has no contents to copy } Index: lucene/src/java/org/apache/lucene/analysis/Token.java =================================================================== --- lucene/src/java/org/apache/lucene/analysis/Token.java (revision 1060792) +++ lucene/src/java/org/apache/lucene/analysis/Token.java (working copy) @@ -28,6 +28,7 @@ import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.AttributeReflector; /** A Token is an occurrence of a term from the text of a field. It consists of @@ -588,6 +589,17 @@ } } + @Override + public void reflectWith(AttributeReflector reflector) { + super.reflectWith(reflector); + reflector.reflect(OffsetAttribute.class, "startOffset", startOffset); + reflector.reflect(OffsetAttribute.class, "endOffset", endOffset); + reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", positionIncrement); + reflector.reflect(PayloadAttribute.class, "payload", payload); + reflector.reflect(FlagsAttribute.class, "flags", flags); + reflector.reflect(TypeAttribute.class, "type", type); + } + /** Convenience factory that returns Token as implementation for the basic * attributes and return the default impl (with "Impl" appended) for all other * attributes. Index: lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java =================================================================== --- lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java (revision 1060792) +++ lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java (working copy) @@ -23,6 +23,7 @@ import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.AttributeReflector; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.UnicodeUtil; @@ -244,6 +245,14 @@ } @Override + public void reflectWith(AttributeReflector reflector) { + reflector.reflect(CharTermAttribute.class, "term", toString()); + final BytesRef bytes = new BytesRef(); + toBytesRef(bytes); + reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes); + } + + @Override public void copyTo(AttributeImpl target) { CharTermAttribute t = (CharTermAttribute) target; t.copyBuffer(termBuffer, 0, termLength); Index: lucene/src/java/org/apache/lucene/search/BoostAttributeImpl.java =================================================================== --- lucene/src/java/org/apache/lucene/search/BoostAttributeImpl.java (revision 1060792) +++ lucene/src/java/org/apache/lucene/search/BoostAttributeImpl.java (working copy) @@ -37,20 +37,6 @@ public void clear() { boost = 1.0f; } - - @Override - public boolean equals(Object other) { - if (this == other) - return true; - if (other instanceof BoostAttributeImpl) - return ((BoostAttributeImpl) other).boost == boost; - return false; - } - - @Override - public int hashCode() { - return Float.floatToIntBits(boost); - } @Override public void copyTo(AttributeImpl target) { Index: lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttributeImpl.java =================================================================== --- lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttributeImpl.java (revision 1060792) +++ lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttributeImpl.java (working copy) @@ -48,25 +48,6 @@ maxNonCompetitiveBoost = Float.NEGATIVE_INFINITY; competitiveTerm = null; } - - @Override - public boolean equals(Object other) { - if (this == other) - return true; - if (other instanceof MaxNonCompetitiveBoostAttributeImpl) { - final MaxNonCompetitiveBoostAttributeImpl o = (MaxNonCompetitiveBoostAttributeImpl) other; - return (o.maxNonCompetitiveBoost == maxNonCompetitiveBoost) - && (o.competitiveTerm == null ? competitiveTerm == null : o.competitiveTerm.equals(competitiveTerm)); - } - return false; - } - - @Override - public int hashCode() { - int hash = Float.floatToIntBits(maxNonCompetitiveBoost); - if (competitiveTerm != null) hash = 31 * hash + competitiveTerm.hashCode(); - return hash; - } @Override public void copyTo(AttributeImpl target) { Index: lucene/src/java/org/apache/lucene/util/AttributeImpl.java =================================================================== --- lucene/src/java/org/apache/lucene/util/AttributeImpl.java (revision 1060792) +++ lucene/src/java/org/apache/lucene/util/AttributeImpl.java (working copy) @@ -20,6 +20,8 @@ import java.io.Serializable; import java.lang.reflect.Field; import java.lang.reflect.Modifier; +import java.lang.ref.WeakReference; +import java.util.LinkedList; /** * Base class for Attributes that can be added to a @@ -37,72 +39,80 @@ public abstract void clear(); /** - * The default implementation of this method accesses all declared - * fields of this object and prints the values in the following syntax: + * This method returns the current attribute values as a string in the following format + * by calling the {@link #reflectWith(AttributeReflector)} method: * + * + * + * @see #reflectWith(AttributeReflector) + */ + public final String reflectAsString(final boolean prependAttClass) { + final StringBuilder buffer = new StringBuilder(); + reflectWith(new AttributeReflector() { + public void reflect(Class attClass, String key, Object value) { + if (buffer.length() > 0) { + buffer.append(','); + } + if (prependAttClass) { + buffer.append(attClass.getName()).append('#'); + } + buffer.append(key).append('=').append((value == null) ? "null" : value); + } + }); + return buffer.toString(); + } + + /** + * This method is for introspection of attributes, it should simply + * add the key/values this attribute holds to the given {@link AttributeReflector}. + * + *

The default implementation calls {@link AttributeReflector#reflect} for all + * non-static fields from the implementing class, using the field name as key + * and the field value as value. The Attribute class is also determined by reflection. + * Please note that the default implementation can only handle single-Attribute + * implementations. + * + *

Custom implementations look like this (e.g. for a combined attribute implementation): *

-   *   public String toString() {
-   *     return "start=" + startOffset + ",end=" + endOffset;
+   *   public void reflectWith(AttributeReflector reflector) {
+   *     reflector.reflect(CharTermAttribute.class, "term", term());
+   *     reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", getPositionIncrement());
    *   }
    * 
- * - * This method may be overridden by subclasses. + * + *

If you implement this method, make sure that for each invocation, the same set of {@link Attribute} + * interfaces and keys are passed to {@link AttributeReflector#reflect} in the same order, but possibly + * different values. So don't automatically exclude e.g. {@code null} properties! + * + * @see #reflectAsString(boolean) */ - @Override - public String toString() { - StringBuilder buffer = new StringBuilder(); - Class clazz = this.getClass(); - Field[] fields = clazz.getDeclaredFields(); + public void reflectWith(AttributeReflector reflector) { + final Class clazz = this.getClass(); + final LinkedList>> interfaces = AttributeSource.getAttributeInterfaces(clazz); + if (interfaces.size() != 1) { + throw new UnsupportedOperationException(clazz.getName() + + " implements more than one Attribute interface, the default reflectWith() implementation cannot handle this."); + } + final Class interf = interfaces.getFirst().get(); + final Field[] fields = clazz.getDeclaredFields(); try { for (int i = 0; i < fields.length; i++) { - Field f = fields[i]; + final Field f = fields[i]; if (Modifier.isStatic(f.getModifiers())) continue; f.setAccessible(true); - Object value = f.get(this); - if (buffer.length()>0) { - buffer.append(','); - } - if (value == null) { - buffer.append(f.getName() + "=null"); - } else { - buffer.append(f.getName() + "=" + value); - } + reflector.reflect(interf, f.getName(), f.get(this)); } } catch (IllegalAccessException e) { // this should never happen, because we're just accessing fields // from 'this' throw new RuntimeException(e); } - - return buffer.toString(); } /** - * Subclasses must implement this method and should compute - * a hashCode similar to this: - *

-   *   public int hashCode() {
-   *     int code = startOffset;
-   *     code = code * 31 + endOffset;
-   *     return code;
-   *   }
-   * 
- * - * see also {@link #equals(Object)} - */ - @Override - public abstract int hashCode(); - - /** - * All values used for computation of {@link #hashCode()} - * should be checked here for equality. - * - * see also {@link Object#equals(Object)} - */ - @Override - public abstract boolean equals(Object other); - - /** * Copies the values from this Attribute into the passed-in * target attribute. The target implementation must support all the * Attributes this implementation supports. Index: lucene/src/java/org/apache/lucene/util/AttributeSource.java =================================================================== --- lucene/src/java/org/apache/lucene/util/AttributeSource.java (revision 1060792) +++ lucene/src/java/org/apache/lucene/util/AttributeSource.java (working copy) @@ -180,20 +180,9 @@ private static final WeakHashMap,LinkedList>>> knownImplClasses = new WeakHashMap,LinkedList>>>(); - /** Expert: Adds a custom AttributeImpl instance with one or more Attribute interfaces. - *

Please note: It is not guaranteed, that att is added to - * the AttributeSource, because the provided attributes may already exist. - * You should always retrieve the wanted attributes using {@link #getAttribute} after adding - * with this method and cast to your class. - * The recommended way to use custom implementations is using an {@link AttributeFactory}. - *

- */ - public void addAttributeImpl(final AttributeImpl att) { - final Class clazz = att.getClass(); - if (attributeImpls.containsKey(clazz)) return; - LinkedList>> foundInterfaces; + static LinkedList>> getAttributeInterfaces(final Class clazz) { synchronized(knownImplClasses) { - foundInterfaces = knownImplClasses.get(clazz); + LinkedList>> foundInterfaces = knownImplClasses.get(clazz); if (foundInterfaces == null) { // we have a strong reference to the class instance holding all interfaces in the list (parameter "att"), // so all WeakReferences are never evicted by GC @@ -210,7 +199,23 @@ actClazz = actClazz.getSuperclass(); } while (actClazz != null); } + return foundInterfaces; } + } + + /** Expert: Adds a custom AttributeImpl instance with one or more Attribute interfaces. + *

Please note: It is not guaranteed, that att is added to + * the AttributeSource, because the provided attributes may already exist. + * You should always retrieve the wanted attributes using {@link #getAttribute} after adding + * with this method and cast to your class. + * The recommended way to use custom implementations is using an {@link AttributeFactory}. + *

+ */ + public final void addAttributeImpl(final AttributeImpl att) { + final Class clazz = att.getClass(); + if (attributeImpls.containsKey(clazz)) return; + final LinkedList>> foundInterfaces = + getAttributeInterfaces(clazz); // add all interfaces of this AttributeImpl to the maps for (WeakReference> curInterfaceRef : foundInterfaces) { @@ -233,7 +238,7 @@ * already in this AttributeSource and returns it. Otherwise a * new instance is created, added to this AttributeSource and returned. */ - public A addAttribute(Class attClass) { + public final A addAttribute(Class attClass) { AttributeImpl attImpl = attributes.get(attClass); if (attImpl == null) { if (!(attClass.isInterface() && Attribute.class.isAssignableFrom(attClass))) { @@ -248,7 +253,7 @@ } /** Returns true, iff this AttributeSource has any attributes */ - public boolean hasAttributes() { + public final boolean hasAttributes() { return !this.attributes.isEmpty(); } @@ -256,7 +261,7 @@ * The caller must pass in a Class<? extends Attribute> value. * Returns true, iff this AttributeSource contains the passed-in Attribute. */ - public boolean hasAttribute(Class attClass) { + public final boolean hasAttribute(Class attClass) { return this.attributes.containsKey(attClass); } @@ -271,7 +276,7 @@ * available. If you want to only use the attribute, if it is available (to optimize * consuming), use {@link #hasAttribute}. */ - public A getAttribute(Class attClass) { + public final A getAttribute(Class attClass) { AttributeImpl attImpl = attributes.get(attClass); if (attImpl == null) { throw new IllegalArgumentException("This AttributeSource does not have the attribute '" + attClass.getName() + "'."); @@ -319,7 +324,7 @@ * Resets all Attributes in this AttributeSource by calling * {@link AttributeImpl#clear()} on each Attribute implementation. */ - public void clearAttributes() { + public final void clearAttributes() { if (hasAttributes()) { if (currentState == null) { computeCurrentState(); @@ -334,7 +339,7 @@ * Captures the state of all Attributes. The return value can be passed to * {@link #restoreState} to restore the state of this or another AttributeSource. */ - public State captureState() { + public final State captureState() { if (!hasAttributes()) { return null; } @@ -360,7 +365,7 @@ * reset its value to the default, in which case the caller should first * call {@link TokenStream#clearAttributes()} on the targetStream. */ - public void restoreState(State state) { + public final void restoreState(State state) { if (state == null) return; do { @@ -431,21 +436,53 @@ return false; } - @Override - public String toString() { - StringBuilder sb = new StringBuilder().append('('); + /** + * This method returns the current attribute values as a string in the following format + * by calling the {@link #reflectWith(AttributeReflector)} method: + * + *
    + *
  • iff {@code prependAttClass=true}: {@code "AttributeClass#key=value,AttributeClass#key=value"} + *
  • iff {@code prependAttClass=false}: {@code "key=value,key=value"} + *
+ * + * @see #reflectWith(AttributeReflector) + */ + public final String reflectAsString(final boolean prependAttClass) { + final StringBuilder buffer = new StringBuilder(); + reflectWith(new AttributeReflector() { + public void reflect(Class attClass, String key, Object value) { + if (buffer.length() > 0) { + buffer.append(','); + } + if (prependAttClass) { + buffer.append(attClass.getName()).append('#'); + } + buffer.append(key).append('=').append((value == null) ? "null" : value); + } + }); + return buffer.toString(); + } + + /** + * This method is for introspection of attributes, it should simply + * add the key/values this AttributeSource holds to the given {@link AttributeReflector}. + * + *

This method iterates over all Attribute implementations and calls the + * corresponding {@link AttributeImpl#reflectWith} method.

+ * + * @see AttributeImpl#reflectWith + */ + public final void reflectWith(AttributeReflector reflector) { if (hasAttributes()) { if (currentState == null) { computeCurrentState(); } for (State state = currentState; state != null; state = state.next) { - if (state != currentState) sb.append(','); - sb.append(state.attribute.toString()); + state.attribute.reflectWith(reflector); } } - return sb.append(')').toString(); } - + /** * Performs a clone of all {@link AttributeImpl} instances returned in a new * {@code AttributeSource} instance. This method can be used to e.g. create another TokenStream @@ -453,7 +490,7 @@ * You can also use it as a (non-performant) replacement for {@link #captureState}, if you need to look * into / modify the captured state. */ - public AttributeSource cloneAttributes() { + public final AttributeSource cloneAttributes() { final AttributeSource clone = new AttributeSource(this.factory); if (hasAttributes()) { Index: lucene/src/test/org/apache/lucene/analysis/TestToken.java =================================================================== --- lucene/src/test/org/apache/lucene/analysis/TestToken.java (revision 1060792) +++ lucene/src/test/org/apache/lucene/analysis/TestToken.java (working copy) @@ -22,8 +22,11 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util._TestUtil; import java.io.StringReader; +import java.util.HashMap; public class TestToken extends LuceneTestCase { @@ -241,6 +244,22 @@ ts.addAttribute(TypeAttribute.class) instanceof Token); } + public void testAttributeReflection() throws Exception { + Token t = new Token("foobar", 6, 22, 8); + _TestUtil.assertAttributeReflection(t, + new HashMap() {{ + put(CharTermAttribute.class.getName() + "#term", "foobar"); + put(TermToBytesRefAttribute.class.getName() + "#bytes", new BytesRef("foobar")); + put(OffsetAttribute.class.getName() + "#startOffset", 6); + put(OffsetAttribute.class.getName() + "#endOffset", 22); + put(PositionIncrementAttribute.class.getName() + "#positionIncrement", 1); + put(PayloadAttribute.class.getName() + "#payload", null); + put(TypeAttribute.class.getName() + "#type", TypeAttribute.DEFAULT_TYPE); + put(FlagsAttribute.class.getName() + "#flags", 8); + }}); + } + + public static T assertCloneIsEqual(T att) { @SuppressWarnings("unchecked") T clone = (T) att.clone(); Index: lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java =================================================================== --- lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java (revision 1060792) +++ lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java (working copy) @@ -19,7 +19,10 @@ import org.apache.lucene.analysis.TestToken; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util._TestUtil; import java.nio.CharBuffer; +import java.util.HashMap; import java.util.Formatter; import java.util.Locale; import java.util.regex.Pattern; @@ -126,6 +129,15 @@ assertNotSame(buf, copy.buffer()); } + public void testAttributeReflection() throws Exception { + CharTermAttributeImpl t = new CharTermAttributeImpl(); + t.append("foobar"); + _TestUtil.assertAttributeReflection(t, new HashMap() {{ + put(CharTermAttribute.class.getName() + "#term", "foobar"); + put(TermToBytesRefAttribute.class.getName() + "#bytes", new BytesRef("foobar")); + }}); + } + public void testCharSequenceInterface() { final String s = "0123456789"; final CharTermAttributeImpl t = new CharTermAttributeImpl(); Index: lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestSimpleAttributeImpl.java =================================================================== --- lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestSimpleAttributeImpl.java (revision 0) +++ lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestSimpleAttributeImpl.java (revision 0) @@ -0,0 +1,46 @@ +package org.apache.lucene.analysis.tokenattributes; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util._TestUtil; +import org.apache.lucene.util.LuceneTestCase; + +import java.util.Collections; +import java.util.HashMap; + +public class TestSimpleAttributeImpl extends LuceneTestCase { + + // this checks using reflection API if the defaults are correct + public void testAttributes() { + _TestUtil.assertAttributeReflection(new PositionIncrementAttributeImpl(), + Collections.singletonMap(PositionIncrementAttribute.class.getName()+"#positionIncrement", 1)); + _TestUtil.assertAttributeReflection(new FlagsAttributeImpl(), + Collections.singletonMap(FlagsAttribute.class.getName()+"#flags", 0)); + _TestUtil.assertAttributeReflection(new TypeAttributeImpl(), + Collections.singletonMap(TypeAttribute.class.getName()+"#type", TypeAttribute.DEFAULT_TYPE)); + _TestUtil.assertAttributeReflection(new PayloadAttributeImpl(), + Collections.singletonMap(PayloadAttribute.class.getName()+"#payload", null)); + _TestUtil.assertAttributeReflection(new KeywordAttributeImpl(), + Collections.singletonMap(KeywordAttribute.class.getName()+"#keyword", false)); + _TestUtil.assertAttributeReflection(new OffsetAttributeImpl(), new HashMap() {{ + put(OffsetAttribute.class.getName()+"#startOffset", 0); + put(OffsetAttribute.class.getName()+"#endOffset", 0); + }}); + } + +} Property changes on: lucene\src\test\org\apache\lucene\analysis\tokenattributes\TestSimpleAttributeImpl.java ___________________________________________________________________ Added: svn:keywords + Date Author Id Revision HeadURL Added: svn:eol-style + native Index: lucene/src/test/org/apache/lucene/util/_TestUtil.java =================================================================== --- lucene/src/test/org/apache/lucene/util/_TestUtil.java (revision 1060792) +++ lucene/src/test/org/apache/lucene/util/_TestUtil.java (working copy) @@ -22,7 +22,11 @@ import java.io.IOException; import java.io.PrintStream; import java.util.Random; +import java.util.Map; +import java.util.HashMap; +import org.junit.Assert; + import org.apache.lucene.index.CheckIndex; import org.apache.lucene.index.ConcurrentMergeScheduler; import org.apache.lucene.index.IndexWriter; @@ -238,4 +242,17 @@ ((ConcurrentMergeScheduler) ms).setMaxMergeCount(3); } } + + /** Checks some basic behaviour of an AttributeImpl + * @param reflectedValues contains a map with "AttributeClass#key" as values + */ + public static void assertAttributeReflection(final AttributeImpl att, Map reflectedValues) { + final Map map = new HashMap(); + att.reflectWith(new AttributeReflector() { + public void reflect(Class attClass, String key, Object value) { + map.put(attClass.getName() + '#' + key, value); + } + }); + Assert.assertEquals("Reflection does not produce same map", reflectedValues, map); + } } Index: lucene/src/test/org/apache/lucene/util/TestAttributeSource.java =================================================================== --- lucene/src/test/org/apache/lucene/util/TestAttributeSource.java (revision 1060792) +++ lucene/src/test/org/apache/lucene/util/TestAttributeSource.java (working copy) @@ -109,34 +109,6 @@ assertEquals("TypeAttribute of original and clone must be equal", typeAtt2, typeAtt); } - public void testToStringAndMultiAttributeImplementations() { - AttributeSource src = new AttributeSource(); - CharTermAttribute termAtt = src.addAttribute(CharTermAttribute.class); - TypeAttribute typeAtt = src.addAttribute(TypeAttribute.class); - termAtt.append("TestTerm"); - typeAtt.setType("TestType"); - assertEquals("Attributes should appear in original order", "("+termAtt.toString()+","+typeAtt.toString()+")", src.toString()); - Iterator it = src.getAttributeImplsIterator(); - assertTrue("Iterator should have 2 attributes left", it.hasNext()); - assertSame("First AttributeImpl from iterator should be termAtt", termAtt, it.next()); - assertTrue("Iterator should have 1 attributes left", it.hasNext()); - assertSame("Second AttributeImpl from iterator should be typeAtt", typeAtt, it.next()); - assertFalse("Iterator should have 0 attributes left", it.hasNext()); - - src = new AttributeSource(); - src.addAttributeImpl(new Token()); - // this should not add a new attribute as Token implements CharTermAttribute, too - termAtt = src.addAttribute(CharTermAttribute.class); - assertTrue("CharTermAttribute should be implemented by Token", termAtt instanceof Token); - // get the Token attribute and check, that it is the only one - it = src.getAttributeImplsIterator(); - Token tok = (Token) it.next(); - assertFalse("There should be only one attribute implementation instance", it.hasNext()); - - termAtt.setEmpty().append("TestTerm"); - assertEquals("Token should only printed once", "("+tok.toString()+")", src.toString()); - } - public void testDefaultAttributeFactory() throws Exception { AttributeSource src = new AttributeSource(); Index: modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java =================================================================== --- modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java (revision 1060792) +++ modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java (working copy) @@ -20,6 +20,7 @@ import java.io.Serializable; import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.AttributeReflector; import com.ibm.icu.lang.UScript; @@ -77,7 +78,7 @@ } @Override - public String toString() { - return "script=" + getName(); + public void reflectWith(AttributeReflector reflector) { + reflector.reflect(ScriptAttribute.class, "script", getName()); } } Property changes on: solr ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/branches/branch_3x/solr:r1060784 Index: solr/common-build.xml =================================================================== --- solr/common-build.xml (revision 1060792) +++ solr/common-build.xml (working copy) @@ -270,7 +270,7 @@ includeAntRuntime="${javac.includeAntRuntime}" sourcepath="" classpathref="@{classpathref}"> - + Index: solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java =================================================================== --- solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java (revision 1060792) +++ solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java (working copy) @@ -1,243 +0,0 @@ -package org.apache.solr.handler; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.commons.io.IOUtils; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.*; -import org.apache.lucene.util.BytesRef; -import org.apache.solr.common.SolrException; -import org.apache.solr.common.SolrInputDocument; -import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.util.ContentStream; -import org.apache.solr.common.util.NamedList; -import org.apache.solr.common.util.SimpleOrderedMap; -import org.apache.solr.request.SolrQueryRequest; -import org.apache.solr.response.SolrQueryResponse; -import org.apache.solr.schema.FieldType; -import org.apache.solr.schema.IndexSchema; -import org.apache.solr.schema.SchemaField; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.xml.stream.XMLInputFactory; -import javax.xml.stream.XMLStreamConstants; -import javax.xml.stream.XMLStreamException; -import javax.xml.stream.XMLStreamReader; -import java.io.IOException; -import java.io.Reader; -import java.io.StringReader; -import java.util.Collection; - -/** - * - * @deprecated Use {@link org.apache.solr.handler.DocumentAnalysisRequestHandler} instead. - **/ -@Deprecated -public class AnalysisRequestHandler extends RequestHandlerBase { - - public static Logger log = LoggerFactory.getLogger(AnalysisRequestHandler.class); - - private XMLInputFactory inputFactory; - - @Override - public void init(NamedList args) { - super.init(args); - - inputFactory = XMLInputFactory.newInstance(); - try { - // The java 1.6 bundled stax parser (sjsxp) does not currently have a thread-safe - // XMLInputFactory, as that implementation tries to cache and reuse the - // XMLStreamReader. Setting the parser-specific "reuse-instance" property to false - // prevents this. - // All other known open-source stax parsers (and the bea ref impl) - // have thread-safe factories. - inputFactory.setProperty("reuse-instance", Boolean.FALSE); - } - catch (IllegalArgumentException ex) { - // Other implementations will likely throw this exception since "reuse-instance" - // isimplementation specific. - log.debug("Unable to set the 'reuse-instance' property for the input factory: " + inputFactory); - } - } - - public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { - SolrParams params = req.getParams(); - Iterable streams = req.getContentStreams(); - if (streams != null) { - for (ContentStream stream : req.getContentStreams()) { - Reader reader = stream.getReader(); - try { - XMLStreamReader parser = inputFactory.createXMLStreamReader(reader); - NamedList result = processContent(parser, req.getSchema()); - rsp.add("response", result); - } - finally { - IOUtils.closeQuietly(reader); - } - } - } - } - - NamedList processContent(XMLStreamReader parser, - IndexSchema schema) throws XMLStreamException, IOException { - NamedList result = new SimpleOrderedMap(); - while (true) { - int event = parser.next(); - switch (event) { - case XMLStreamConstants.END_DOCUMENT: { - parser.close(); - return result; - } - case XMLStreamConstants.START_ELEMENT: { - String currTag = parser.getLocalName(); - if ("doc".equals(currTag)) { - log.trace("Tokenizing doc..."); - - SolrInputDocument doc = readDoc(parser); - SchemaField uniq = schema.getUniqueKeyField(); - NamedList>> theTokens = new SimpleOrderedMap>>(); - result.add(doc.getFieldValue(uniq.getName()).toString(), theTokens); - for (String name : doc.getFieldNames()) { - FieldType ft = schema.getFieldType(name); - Analyzer analyzer = ft.getAnalyzer(); - Collection vals = doc.getFieldValues(name); - for (Object val : vals) { - Reader reader = new StringReader(val.toString()); - TokenStream tstream = analyzer.tokenStream(name, reader); - NamedList> tokens = getTokens(tstream); - theTokens.add(name, tokens); - } - } - } - break; - } - } - } - } - - static NamedList> getTokens(TokenStream tstream) throws IOException { - // outer is namedList since order of tokens is important - NamedList> tokens = new NamedList>(); - // TODO: support custom attributes - CharTermAttribute termAtt = null; - TermToBytesRefAttribute bytesAtt = null; - if (tstream.hasAttribute(CharTermAttribute.class)) { - termAtt = tstream.getAttribute(CharTermAttribute.class); - } else if (tstream.hasAttribute(TermToBytesRefAttribute.class)) { - bytesAtt = tstream.getAttribute(TermToBytesRefAttribute.class); - } - final OffsetAttribute offsetAtt = tstream.addAttribute(OffsetAttribute.class); - final TypeAttribute typeAtt = tstream.addAttribute(TypeAttribute.class); - final PositionIncrementAttribute posIncAtt = tstream.addAttribute(PositionIncrementAttribute.class); - - final BytesRef bytes = new BytesRef(); - while (tstream.incrementToken()) { - NamedList token = new SimpleOrderedMap(); - tokens.add("token", token); - if (termAtt != null) { - token.add("value", termAtt.toString()); - } - if (bytesAtt != null) { - bytesAtt.toBytesRef(bytes); - // TODO: This is incorrect when numeric fields change in later lucene versions. It should use BytesRef directly! - token.add("value", bytes.utf8ToString()); - } - token.add("start", offsetAtt.startOffset()); - token.add("end", offsetAtt.endOffset()); - token.add("posInc", posIncAtt.getPositionIncrement()); - token.add("type", typeAtt.type()); - //TODO: handle payloads - } - return tokens; - } - - SolrInputDocument readDoc(XMLStreamReader parser) throws XMLStreamException { - SolrInputDocument doc = new SolrInputDocument(); - - StringBuilder text = new StringBuilder(); - String name = null; - String attrName = ""; - float boost = 1.0f; - boolean isNull = false; - while (true) { - int event = parser.next(); - switch (event) { - // Add everything to the text - case XMLStreamConstants.SPACE: - case XMLStreamConstants.CDATA: - case XMLStreamConstants.CHARACTERS: - text.append(parser.getText()); - break; - - case XMLStreamConstants.END_ELEMENT: - if ("doc".equals(parser.getLocalName())) { - return doc; - } else if ("field".equals(parser.getLocalName())) { - if (!isNull) { - doc.addField(name, text.toString(), boost); - boost = 1.0f; - } - } - break; - - case XMLStreamConstants.START_ELEMENT: - text.setLength(0); - String localName = parser.getLocalName(); - if (!"field".equals(localName)) { - log.warn("unexpected XML tag doc/" + localName); - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, - "unexpected XML tag doc/" + localName); - } - - String attrVal = ""; - for (int i = 0; i < parser.getAttributeCount(); i++) { - attrName = parser.getAttributeLocalName(i); - attrVal = parser.getAttributeValue(i); - if ("name".equals(attrName)) { - name = attrVal; - } - } - break; - } - } - } - - - //////////////////////// SolrInfoMBeans methods ////////////////////// - @Override - public String getDescription() { - return "Provide Analysis of text"; - } - - @Override - public String getVersion() { - return "$Revision$"; - } - - @Override - public String getSourceId() { - return "$Id$"; - } - - @Override - public String getSource() { - return "$URL$"; - } - -} Index: solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java =================================================================== --- solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java (revision 1060792) +++ solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java (working copy) @@ -20,10 +20,14 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.CharReader; import org.apache.lucene.analysis.CharStream; -import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.*; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.index.Payload; +import org.apache.lucene.util.Attribute; +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.AttributeReflector; +import org.apache.lucene.util.SorterTemplate; import org.apache.solr.analysis.CharFilterFactory; import org.apache.solr.analysis.TokenFilterFactory; import org.apache.solr.analysis.TokenizerChain; @@ -34,10 +38,13 @@ import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.schema.FieldType; +import org.apache.solr.util.ByteUtils; +import org.apache.noggit.CharArr; import java.io.IOException; import java.io.StringReader; import java.util.*; +import java.math.BigInteger; /** * A base class for all analysis request handlers. @@ -47,7 +54,7 @@ */ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { - public static final Set EMPTY_STRING_SET = Collections.emptySet(); + public static final Set EMPTY_BYTES_SET = Collections.emptySet(); public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { rsp.add("analysis", doAnalysis(req)); @@ -107,7 +114,7 @@ } TokenStream tokenStream = tfac.create(tokenizerChain.charStream(new StringReader(value))); - List tokens = analyzeTokenStream(tokenStream); + List tokens = analyzeTokenStream(tokenStream); namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context)); @@ -115,7 +122,7 @@ for (TokenFilterFactory tokenFilterFactory : filtfacs) { tokenStream = tokenFilterFactory.create(listBasedTokenStream); - List tokenList = analyzeTokenStream(tokenStream); + List tokenList = analyzeTokenStream(tokenStream); namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokenList, context)); listBasedTokenStream = new ListBasedTokenStream(tokenList); } @@ -126,14 +133,24 @@ /** * Analyzes the given text using the given analyzer and returns the produced tokens. * - * @param value The value to analyze. + * @param query The query to analyze. * @param analyzer The analyzer to use. - * - * @return The produces token list. */ - protected List analyzeValue(String value, Analyzer analyzer) { - TokenStream tokenStream = analyzer.tokenStream("", new StringReader(value)); - return analyzeTokenStream(tokenStream); + protected Set getQueryTokenSet(String query, Analyzer analyzer) { + final Set tokens = new HashSet(); + final TokenStream tokenStream = analyzer.tokenStream("", new StringReader(query)); + final TermToBytesRefAttribute bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class); + try { + tokenStream.reset(); + while (tokenStream.incrementToken()) { + final BytesRef bytes = new BytesRef(); + bytesAtt.toBytesRef(bytes); + tokens.add(bytes); + } + } catch (IOException ioe) { + throw new RuntimeException("Error occured while iterating over tokenstream", ioe); + } + return tokens; } /** @@ -143,41 +160,17 @@ * * @return List of tokens produced from the TokenStream */ - private List analyzeTokenStream(TokenStream tokenStream) { - List tokens = new ArrayList(); - - // TODO change this API to support custom attributes - CharTermAttribute termAtt = null; - TermToBytesRefAttribute bytesAtt = null; - if (tokenStream.hasAttribute(CharTermAttribute.class)) { - termAtt = tokenStream.getAttribute(CharTermAttribute.class); - } else if (tokenStream.hasAttribute(TermToBytesRefAttribute.class)) { - bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class); - } - final OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class); - final TypeAttribute typeAtt = tokenStream.addAttribute(TypeAttribute.class); - final PositionIncrementAttribute posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class); - final FlagsAttribute flagsAtt = tokenStream.addAttribute(FlagsAttribute.class); - final PayloadAttribute payloadAtt = tokenStream.addAttribute(PayloadAttribute.class); - + private List analyzeTokenStream(TokenStream tokenStream) { + List tokens = new ArrayList(); + // for backwards compatibility, add all "common" attributes + tokenStream.addAttribute(PositionIncrementAttribute.class); + tokenStream.addAttribute(OffsetAttribute.class); + tokenStream.addAttribute(TypeAttribute.class); final BytesRef bytes = new BytesRef(); try { + tokenStream.reset(); while (tokenStream.incrementToken()) { - Token token = new Token(); - if (termAtt != null) { - token.setEmpty().append(termAtt); - } - if (bytesAtt != null) { - bytesAtt.toBytesRef(bytes); - // TODO: This is incorrect when numeric fields change in later lucene versions. It should use BytesRef directly! - token.setEmpty().append(bytes.utf8ToString()); - } - token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); - token.setType(typeAtt.type()); - token.setFlags(flagsAtt.getFlags()); - token.setPayload(payloadAtt.getPayload()); - token.setPositionIncrement(posIncAtt.getPositionIncrement()); - tokens.add((Token) token.clone()); + tokens.add(tokenStream.cloneAttributes()); } } catch (IOException ioe) { throw new RuntimeException("Error occured while iterating over tokenstream", ioe); @@ -186,6 +179,14 @@ return tokens; } + // a static mapping of the reflected attribute keys to the names used in Solr 1.4 + static Map ATTRIBUTE_MAPPING = Collections.unmodifiableMap(new HashMap() {{ + put(CharTermAttribute.class.getName() + "#term", "raw_text"); + put(OffsetAttribute.class.getName() + "#startOffset", "start"); + put(OffsetAttribute.class.getName() + "#endOffset", "end"); + put(TypeAttribute.class.getName() + "#type", "type"); + }}); + /** * Converts the list of Tokens to a list of NamedLists representing the tokens. * @@ -194,42 +195,88 @@ * * @return List of NamedLists containing the relevant information taken from the tokens */ - private List convertTokensToNamedLists(List tokens, AnalysisContext context) { - List tokensNamedLists = new ArrayList(); + private List convertTokensToNamedLists(final List tokens, AnalysisContext context) { + final List tokensNamedLists = new ArrayList(); - Collections.sort(tokens, new Comparator() { - public int compare(Token o1, Token o2) { - return o1.endOffset() - o2.endOffset(); + final int[] positions = new int[tokens.size()]; + int position = 0; + for (int i = 0, c = tokens.size(); i < c; i++) { + AttributeSource token = tokens.get(i); + position += token.addAttribute(PositionIncrementAttribute.class).getPositionIncrement(); + positions[i] = position; + } + + // sort the tokens by absoulte position + new SorterTemplate() { + @Override + protected void swap(int i, int j) { + Collections.swap(tokens, i, j); } - }); + + @Override + protected int compare(int i, int j) { + return positions[i] - positions[j]; + } - int position = 0; + @Override + protected void setPivot(int i) { + pivot = positions[i]; + } + + @Override + protected int comparePivot(int j) { + return pivot - positions[j]; + } + + private int pivot; + }.mergeSort(0, tokens.size() - 1); FieldType fieldType = context.getFieldType(); - for (Token token : tokens) { - NamedList tokenNamedList = new SimpleOrderedMap(); + for (int i = 0, c = tokens.size(); i < c; i++) { + AttributeSource token = tokens.get(i); + final NamedList tokenNamedList = new SimpleOrderedMap(); + final BytesRef rawBytes = new BytesRef(); + token.getAttribute(TermToBytesRefAttribute.class).toBytesRef(rawBytes); - String text = fieldType.indexedToReadable(token.toString()); - tokenNamedList.add("text", text); - if (!text.equals(token.toString())) { - tokenNamedList.add("raw_text", token.toString()); - } - tokenNamedList.add("type", token.type()); - tokenNamedList.add("start", token.startOffset()); - tokenNamedList.add("end", token.endOffset()); + CharArr text = new CharArr(rawBytes.length); + fieldType.indexedToReadable(rawBytes, text); - position += token.getPositionIncrement(); - tokenNamedList.add("position", position); + tokenNamedList.add("text", text.toString()); + tokenNamedList.add("raw_bytes", rawBytes.toString()); - if (context.getTermsToMatch().contains(token.toString())) { + if (context.getTermsToMatch().contains(rawBytes)) { tokenNamedList.add("match", true); } - if (token.getPayload() != null) { - tokenNamedList.add("payload", token.getPayload()); - } + tokenNamedList.add("position", positions[i]); + token.reflectWith(new AttributeReflector() { + public void reflect(Class attClass, String key, Object value) { + // leave out position and bytes term + if (TermToBytesRefAttribute.class.isAssignableFrom(attClass)) + return; + if (PositionIncrementAttribute.class.isAssignableFrom(attClass)) + return; + + String k = attClass.getName() + '#' + key; + + // map keys for "standard attributes": + if (ATTRIBUTE_MAPPING.containsKey(k)) { + k = ATTRIBUTE_MAPPING.get(k); + } + + if (value instanceof Payload) { + final Payload p = (Payload) value; + value = new BytesRef(p.getData()).toString(); + } else if (value instanceof BytesRef) { + value = value.toString(); + } + + tokenNamedList.add(k, value); + } + }); + tokensNamedLists.add(tokenNamedList); } @@ -261,38 +308,27 @@ */ // TODO refactor to support custom attributes protected final static class ListBasedTokenStream extends TokenStream { - private final List tokens; - private Iterator tokenIterator; + private final List tokens; + private Iterator tokenIterator; - private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); - private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); - private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class); - private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class); - private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); /** * Creates a new ListBasedTokenStream which uses the given tokens as its token source. * * @param tokens Source of tokens to be used */ - ListBasedTokenStream(List tokens) { + ListBasedTokenStream(List tokens) { this.tokens = tokens; tokenIterator = tokens.iterator(); } - /** - * {@inheritDoc} - */ @Override public boolean incrementToken() throws IOException { if (tokenIterator.hasNext()) { - Token next = tokenIterator.next(); - termAtt.copyBuffer(next.buffer(), 0, next.length()); - typeAtt.setType(next.type()); - offsetAtt.setOffset(next.startOffset(), next.endOffset()); - flagsAtt.setFlags(next.getFlags()); - payloadAtt.setPayload(next.getPayload()); - posIncAtt.setPositionIncrement(next.getPositionIncrement()); + AttributeSource next = tokenIterator.next(); + Iterator> atts = next.getAttributeClassesIterator(); + while (atts.hasNext()) // make sure all att impls in the token exist here + addAttribute(atts.next()); + next.copyTo(this); return true; } else { return false; @@ -314,7 +350,7 @@ private final String fieldName; private final FieldType fieldType; private final Analyzer analyzer; - private final Set termsToMatch; + private final Set termsToMatch; /** * Constructs a new AnalysisContext with a given field tpe, analyzer and @@ -328,7 +364,7 @@ * @param termsToMatch Holds all the terms that should match during the * analysis process. */ - public AnalysisContext(FieldType fieldType, Analyzer analyzer, Set termsToMatch) { + public AnalysisContext(FieldType fieldType, Analyzer analyzer, Set termsToMatch) { this(null, fieldType, analyzer, termsToMatch); } @@ -343,7 +379,7 @@ * */ public AnalysisContext(String fieldName, FieldType fieldType, Analyzer analyzer) { - this(fieldName, fieldType, analyzer, EMPTY_STRING_SET); + this(fieldName, fieldType, analyzer, EMPTY_BYTES_SET); } /** @@ -359,7 +395,7 @@ * @param termsToMatch Holds all the terms that should match during the * analysis process. */ - public AnalysisContext(String fieldName, FieldType fieldType, Analyzer analyzer, Set termsToMatch) { + public AnalysisContext(String fieldName, FieldType fieldType, Analyzer analyzer, Set termsToMatch) { this.fieldName = fieldName; this.fieldType = fieldType; this.analyzer = analyzer; @@ -378,7 +414,7 @@ return analyzer; } - public Set getTermsToMatch() { + public Set getTermsToMatch() { return termsToMatch; } } Index: solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java =================================================================== --- solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java (revision 1060792) +++ solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java (working copy) @@ -19,7 +19,9 @@ import org.apache.commons.io.IOUtils; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.BytesRef; import org.apache.solr.client.solrj.request.DocumentAnalysisRequest; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; @@ -216,21 +218,20 @@ FieldType fieldType = schema.getFieldType(name); - Set termsToMatch = new HashSet(); - if (request.getQuery() != null && request.isShowMatch()) { - try { - List tokens = analyzeValue(request.getQuery(), fieldType.getQueryAnalyzer()); - for (Token token : tokens) { - termsToMatch.add(token.toString()); - } - } catch (Exception e) { - // ignore analysis exceptions since we are applying arbitrary text to all fields - } + final String queryValue = request.getQuery(); + Set termsToMatch; + try { + termsToMatch = (queryValue != null && request.isShowMatch()) + ? getQueryTokenSet(queryValue, fieldType.getQueryAnalyzer()) + : EMPTY_BYTES_SET; + } catch (Exception e) { + // ignore analysis exceptions since we are applying arbitrary text to all fields + termsToMatch = EMPTY_BYTES_SET; } if (request.getQuery() != null) { try { - AnalysisContext analysisContext = new AnalysisContext(fieldType, fieldType.getQueryAnalyzer(), EMPTY_STRING_SET); + AnalysisContext analysisContext = new AnalysisContext(fieldType, fieldType.getQueryAnalyzer(), EMPTY_BYTES_SET); fieldTokens.add("query", analyzeValue(request.getQuery(), analysisContext)); } catch (Exception e) { // ignore analysis exceptions since we are applying arbitrary text to all fields Index: solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java =================================================================== --- solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java (revision 1060792) +++ solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java (working copy) @@ -17,7 +17,9 @@ package org.apache.solr.handler; -import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.BytesRef; import org.apache.solr.client.solrj.request.FieldAnalysisRequest; import org.apache.solr.common.params.AnalysisParams; import org.apache.solr.common.params.CommonParams; @@ -30,10 +32,7 @@ import org.apache.solr.schema.IndexSchema; import org.apache.commons.io.IOUtils; -import java.util.Arrays; -import java.util.HashSet; -import java.util.List; -import java.util.Set; +import java.util.*; import java.io.Reader; import java.io.IOException; @@ -222,14 +221,10 @@ */ private NamedList analyzeValues(FieldAnalysisRequest analysisRequest, FieldType fieldType, String fieldName) { - Set termsToMatch = new HashSet(); - String queryValue = analysisRequest.getQuery(); - if (queryValue != null && analysisRequest.isShowMatch()) { - List tokens = analyzeValue(queryValue, fieldType.getQueryAnalyzer()); - for (Token token : tokens) { - termsToMatch.add(token.toString()); - } - } + final String queryValue = analysisRequest.getQuery(); + final Set termsToMatch = (queryValue != null && analysisRequest.isShowMatch()) + ? getQueryTokenSet(queryValue, fieldType.getQueryAnalyzer()) + : EMPTY_BYTES_SET; NamedList analyzeResults = new SimpleOrderedMap(); if (analysisRequest.getFieldValue() != null) { Index: solr/src/webapp/web/admin/analysis.jsp =================================================================== --- solr/src/webapp/web/admin/analysis.jsp (revision 1060792) +++ solr/src/webapp/web/admin/analysis.jsp (working copy) @@ -24,6 +24,7 @@ org.apache.lucene.analysis.CharReader, org.apache.lucene.analysis.CharStream, org.apache.lucene.analysis.tokenattributes.*, + org.apache.lucene.util.AttributeReflector, org.apache.solr.analysis.CharFilterFactory, org.apache.solr.analysis.TokenFilterFactory, org.apache.solr.analysis.TokenizerChain, @@ -31,7 +32,8 @@ org.apache.solr.schema.FieldType, org.apache.solr.schema.SchemaField, org.apache.solr.common.util.XML, - javax.servlet.jsp.JspWriter,java.io.IOException + javax.servlet.jsp.JspWriter,java.io.IOException, + org.apache.noggit.CharArr "%> <%@ page import="java.io.Reader"%> <%@ page import="java.io.StringReader"%> @@ -39,8 +41,6 @@ <%@ page import="java.math.BigInteger" %> <%-- $Id$ --%> -<%-- $Source: /cvs/main/searching/org.apache.solrolarServer/resources/admin/analysis.jsp,v $ --%> -<%-- $Name: $ --%> <%@include file="header.jsp" %> @@ -71,19 +71,19 @@ @@ -115,7 +115,7 @@ @@ -148,24 +148,28 @@ } if (field!=null) { - HashSet matches = null; + HashSet matches = null; if (qval!="" && highlight) { Reader reader = new StringReader(qval); Analyzer analyzer = field.getType().getQueryAnalyzer(); TokenStream tstream = analyzer.reusableTokenStream(field.getName(),reader); + TermToBytesRefAttribute bytesAtt = tstream.getAttribute(TermToBytesRefAttribute.class); tstream.reset(); - List tokens = getTokens(tstream); - matches = new HashSet(); - for (AttributeSource t : tokens) { matches.add( new Tok(t,0)); } + matches = new HashSet(); + while (tstream.incrementToken()) { + final BytesRef bytes = new BytesRef(); + bytesAtt.toBytesRef(bytes); + matches.add(bytes); + } } if (val!="") { out.println("

Index Analyzer

"); - doAnalyzer(out, field, val, false, verbose,matches); + doAnalyzer(out, field, val, false, verbose, matches); } if (qval!="") { out.println("

Query Analyzer

"); - doAnalyzer(out, field, qval, true, qverbose,null); + doAnalyzer(out, field, qval, true, qverbose, null); } } @@ -177,7 +181,7 @@ <%! - private static void doAnalyzer(JspWriter out, SchemaField field, String val, boolean queryAnalyser, boolean verbose, Set match) throws Exception { + private static void doAnalyzer(JspWriter out, SchemaField field, String val, boolean queryAnalyser, boolean verbose, Set match) throws Exception { FieldType ft = field.getType(); Analyzer analyzer = queryAnalyser ? @@ -240,7 +244,7 @@ tstream.reset(); List tokens = getTokens(tstream); if (verbose) { - writeHeader(out, analyzer.getClass(), new HashMap()); + writeHeader(out, analyzer.getClass(), Collections.EMPTY_MAP); } writeTokens(out, tokens, ft, verbose, match); } @@ -249,52 +253,53 @@ static List getTokens(TokenStream tstream) throws IOException { List tokens = new ArrayList(); - - while (true) { - if (!tstream.incrementToken()) - break; - else { - tokens.add(tstream.cloneAttributes()); - } + tstream.reset(); + while (tstream.incrementToken()) { + tokens.add(tstream.cloneAttributes()); } return tokens; } - + private static class ReflectItem { + final Class attClass; + final String key; + final Object value; + + ReflectItem(Class attClass, String key, Object value) { + this.attClass = attClass; + this.key = key; + this.value = value; + } + } + private static class Tok { - AttributeSource token; - int pos; + final BytesRef bytes = new BytesRef(); + final int pos; + final List reflected = new ArrayList(); + Tok(AttributeSource token, int pos) { - this.token=token; - this.pos=pos; + token.getAttribute(TermToBytesRefAttribute.class).toBytesRef(bytes); + this.pos = pos; + token.reflectWith(new AttributeReflector() { + public void reflect(Class attClass, String key, Object value) { + // leave out position and raw term + if (TermToBytesRefAttribute.class.isAssignableFrom(attClass)) + return; + if (PositionIncrementAttribute.class.isAssignableFrom(attClass)) + return; + if ("term".equals(key) && CharTermAttribute.class.isAssignableFrom(attClass)) + key = "rawTerm"; + reflected.add(new ReflectItem(attClass, key, value)); + } + }); } - - public boolean equals(Object o) { - return ((Tok)o).token.toString().equals(token.toString()); - } - public int hashCode() { - return token.toString().hashCode(); - } - public String toString() { - return token.toString(); - } - public String toPrintableString() { - TermToBytesRefAttribute att = token.addAttribute(TermToBytesRefAttribute.class); - if (att instanceof CharTermAttribute) - return att.toString(); - else { - BytesRef bytes = new BytesRef(); - att.toBytesRef(bytes); - return bytes.toString(); - } - } } - private static interface ToStr { - public String toStr(Object o); + private static interface TokToStr { + public String toStr(Tok o); } - private static void printRow(JspWriter out, String header, List[] arrLst, ToStr converter, boolean multival, boolean verbose, Set match) throws IOException { + private static void printRow(JspWriter out, String header, String headerTitle, List[] arrLst, TokToStr converter, boolean multival, boolean verbose, Set match) throws IOException { // find the maximum number of terms for any position int maxSz=1; if (multival) { @@ -308,7 +313,13 @@ out.println("
"); if (idx==0 && verbose) { if (header != null) { - out.print(""); } @@ -317,7 +328,7 @@ for (int posIndex=0; posIndex lst = arrLst[posIndex]; if (lst.size() <= idx) continue; - if (match!=null && match.contains(lst.get(idx))) { + if (match!=null && match.contains(lst.get(idx).bytes)) { out.print("
- Field + Field - +
- Field value (Index) + Field value (Index)
verbose output >
- +
- Field value (Query) + Field value (Query)
verbose output >
- +
- +
"); + out.print(""); XML.escapeCharData(header,out); out.println(" args) throws IOException { out.print("

"); out.print(clazz.getName()); @@ -359,19 +361,14 @@ // readable, raw, pos, type, start/end - static void writeTokens(JspWriter out, List tokens, final FieldType ft, boolean verbose, Set match) throws IOException { + static void writeTokens(JspWriter out, List tokens, final FieldType ft, boolean verbose, Set match) throws IOException { // Use a map to tell what tokens are in what positions // because some tokenizers/filters may do funky stuff with // very large increments, or negative increments. HashMap> map = new HashMap>(); - boolean needRaw=false; - int pos=0; + int pos=0, reflectionCount = -1; for (AttributeSource t : tokens) { - if (!t.toString().equals(ft.indexedToReadable(t.toString()))) { - needRaw=true; - } - pos += t.addAttribute(PositionIncrementAttribute.class).getPositionIncrement(); List lst = map.get(pos); if (lst==null) { @@ -379,117 +376,67 @@ map.put(pos,lst); } Tok tok = new Tok(t,pos); + // sanity check + if (reflectionCount < 0) { + reflectionCount = tok.reflected.size(); + } else { + if (reflectionCount != tok.reflected.size()) + throw new RuntimeException("Should not happen: Number of reflected entries differs for position=" + pos); + } lst.add(tok); } List[] arr = (List[])map.values().toArray(new ArrayList[map.size()]); - /* Jetty 6.1.3 miscompiles this generics version... - Arrays.sort(arr, new Comparator>() { - public int compare(List toks, List toks1) { - return toks.get(0).pos - toks1.get(0).pos; - } - } - */ - + // Jetty 6.1.3 miscompiles a generics-enabled version..., without generics: Arrays.sort(arr, new Comparator() { public int compare(Object toks, Object toks1) { return ((List)toks).get(0).pos - ((List)toks1).get(0).pos; } - } + }); - - ); - out.println(""); if (verbose) { - printRow(out,"term position", arr, new ToStr() { - public String toStr(Object o) { - return Integer.toString(((Tok)o).pos); + printRow(out, "position", "calculated from " + PositionIncrementAttribute.class.getName(), arr, new TokToStr() { + public String toStr(Tok t) { + return Integer.toString(t.pos); } - } - ,false - ,verbose - ,null); + },false,verbose,null); } - - printRow(out,"term text", arr, new ToStr() { - public String toStr(Object o) { - return ft.indexedToReadable( ((Tok)o).toPrintableString() ); + printRow(out, "term", "indexedToReadable applied to " + TermToBytesRefAttribute.class.getName(), arr, new TokToStr() { + public String toStr(Tok t) { + CharArr text = new CharArr(t.bytes.length); + ft.indexedToReadable(t.bytes, text); + return text.toString(); } - } - ,true - ,verbose - ,match - ); + },true,verbose,match); - if (needRaw) { - printRow(out,"raw text", arr, new ToStr() { - public String toStr(Object o) { - // page is UTF-8, so anything goes. - return ((Tok)o).toPrintableString(); - } - } - ,true - ,verbose - ,match - ); - } - if (verbose) { - printRow(out,"term type", arr, new ToStr() { - public String toStr(Object o) { - String tt = ((Tok)o).token.addAttribute(TypeAttribute.class).type(); - if (tt == null) { - return "null"; - } else { - return tt; - } + printRow(out, "rawBytes", TermToBytesRefAttribute.class.getName(), arr, new TokToStr() { + public String toStr(Tok t) { + return t.bytes.toString(); } - } - ,true - ,verbose, - null - ); - } + },true,verbose,match); - if (verbose) { - printRow(out,"source start,end", arr, new ToStr() { - public String toStr(Object o) { - AttributeSource t = ((Tok)o).token; - return Integer.toString(t.addAttribute(OffsetAttribute.class).startOffset()) + ',' + t.addAttribute(OffsetAttribute.class).endOffset() ; - } - } - ,true - ,verbose - ,null - ); - } - - if (verbose) { - printRow(out,"payload", arr, new ToStr() { - public String toStr(Object o) { - AttributeSource t = ((Tok)o).token; - Payload p = t.addAttribute(PayloadAttribute.class).getPayload(); - if( null != p ) { - BigInteger bi = new BigInteger( p.getData() ); - String ret = bi.toString( 16 ); - if (ret.length() % 2 != 0) { - // Pad with 0 - ret = "0"+ret; + for (int att=0; att < reflectionCount; att++) { + final ReflectItem item0 = arr[0].get(0).reflected.get(att); + final int i = att; + printRow(out, item0.key, item0.attClass.getName(), arr, new TokToStr() { + public String toStr(Tok t) { + final ReflectItem item = t.reflected.get(i); + if (item0.attClass != item.attClass || !item0.key.equals(item.key)) + throw new RuntimeException("Should not happen: attribute types suddenly change at position=" + t.pos); + if (item.value instanceof Payload) { + final Payload p = (Payload) item.value; + return new BytesRef(p.getData()).toString(); + } else { + return (item.value != null) ? item.value.toString() : ""; } - ret += isPayloadString( p ); - return ret; } - return ""; - } + },true,verbose, CharTermAttribute.class.isAssignableFrom(item0.attClass) ? match : null); } - ,true - ,verbose - ,null - ); } out.println("
");