Property changes on: .
___________________________________________________________________
Modified: svn:mergeinfo
Merged /lucene/dev/branches/branch_3x:r1060784
Property changes on: lucene
___________________________________________________________________
Modified: svn:mergeinfo
Merged /lucene/dev/branches/branch_3x/lucene:r1060784
Index: lucene/CHANGES.txt
===================================================================
--- lucene/CHANGES.txt (revision 1060792)
+++ lucene/CHANGES.txt (working copy)
@@ -362,9 +362,9 @@
* LUCENE-2302: The new interface for term attributes, CharTermAttribute,
now implements CharSequence. This requires the toString() methods of
CharTermAttribute, deprecated TermAttribute, and Token to return only
- the term text and no other attribute contents.
- TODO: Point to new attribute inspection API coming with LUCENE-2374.
- (Uwe Schindler, Robert Muir)
+ the term text and no other attribute contents. LUCENE-2374 implements
+ an attribute reflection API to no longer rely on toString() for attribute
+ inspection. (Uwe Schindler, Robert Muir)
* LUCENE-2372, LUCENE-2389: StandardAnalyzer, KeywordAnalyzer,
PerFieldAnalyzerWrapper, WhitespaceTokenizer are now final. Also removed
@@ -592,6 +592,23 @@
to ensure that the norm is encoded with your Similarity.
(Robert Muir, Mike McCandless)
+* LUCENE-2374: Added Attribute reflection API: It's now possible to inspect the
+ contents of AttributeImpl and AttributeSource using a well-defined API.
+ This is e.g. used by Solr's AnalysisRequestHandlers to display all attributes
+ in a structured way.
+ There are also some backwards incompatible changes in toString() output,
+ as LUCENE-2302 introduced the CharSequence interface to CharTermAttribute
+ leading to changed toString() return values. The new API allows to get a
+ string representation in a well-defined way using a new method
+ reflectAsString(). For backwards compatibility reasons, when toString()
+ was implemented by implementation subclasses, the default implementation of
+ AttributeImpl.reflectWith() uses toString()s output instead to report the
+ Attribute's properties. Otherwise, reflectWith() uses Java's reflection
+ (like toString() did before) to get the attribute properties.
+ In addition, the mandatory equals() and hashCode() are no longer required
+ for AttributeImpls, but can still be provided (if needed).
+ (Uwe Schindler)
+
Bug fixes
* LUCENE-2249: ParallelMultiSearcher should shut down thread pool on
Index: lucene/MIGRATE.txt
===================================================================
--- lucene/MIGRATE.txt (revision 1060792)
+++ lucene/MIGRATE.txt (working copy)
@@ -328,3 +328,9 @@
* LUCENE-2761: DataInput.readVInt/readVLong and DataOutput.writeVInt/writeVLong
are final. If you subclassed this code before to encode variable-length
integers in some specialized way, use the Codec API instead.
+
+* LUCENE-2374: The backwards layer in AttributeImpl was removed. To support correct
+ reflection of AttributeImpl instances, you have to override reflectWith() to
+ customize output. toString() is no longer implemented by AttributeImpl, so
+ if you have overridden toString(), port your customization over to reflectWith().
+ reflectAsString() would then return what toString() did before.
Index: lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java (revision 1060792)
+++ lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java (working copy)
@@ -19,6 +19,7 @@
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.AttributeReflector;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.document.NumericField; // for javadocs
@@ -154,18 +155,18 @@
public void clear() {
// this attribute has no contents to clear
}
-
+
@Override
- public boolean equals(Object other) {
- return other == this;
+ public void reflectWith(AttributeReflector reflector) {
+ final BytesRef bytes = new BytesRef();
+ toBytesRef(bytes);
+ reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes);
+ reflector.reflect(NumericTermAttribute.class, "shift", getShift());
+ reflector.reflect(NumericTermAttribute.class, "rawValue", getRawValue());
+ reflector.reflect(NumericTermAttribute.class, "valueSize", getValueSize());
}
-
+
@Override
- public int hashCode() {
- return System.identityHashCode(this);
- }
-
- @Override
public void copyTo(AttributeImpl target) {
// this attribute has no contents to copy
}
Index: lucene/src/java/org/apache/lucene/analysis/Token.java
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/Token.java (revision 1060792)
+++ lucene/src/java/org/apache/lucene/analysis/Token.java (working copy)
@@ -28,6 +28,7 @@
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.AttributeReflector;
/**
A Token is an occurrence of a term from the text of a field. It consists of
@@ -588,6 +589,17 @@
}
}
+ @Override
+ public void reflectWith(AttributeReflector reflector) {
+ super.reflectWith(reflector);
+ reflector.reflect(OffsetAttribute.class, "startOffset", startOffset);
+ reflector.reflect(OffsetAttribute.class, "endOffset", endOffset);
+ reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", positionIncrement);
+ reflector.reflect(PayloadAttribute.class, "payload", payload);
+ reflector.reflect(FlagsAttribute.class, "flags", flags);
+ reflector.reflect(TypeAttribute.class, "type", type);
+ }
+
/** Convenience factory that returns Token as implementation for the basic
* attributes and return the default impl (with "Impl" appended) for all other
* attributes.
Index: lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java (revision 1060792)
+++ lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java (working copy)
@@ -23,6 +23,7 @@
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.AttributeReflector;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.UnicodeUtil;
@@ -244,6 +245,14 @@
}
@Override
+ public void reflectWith(AttributeReflector reflector) {
+ reflector.reflect(CharTermAttribute.class, "term", toString());
+ final BytesRef bytes = new BytesRef();
+ toBytesRef(bytes);
+ reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes);
+ }
+
+ @Override
public void copyTo(AttributeImpl target) {
CharTermAttribute t = (CharTermAttribute) target;
t.copyBuffer(termBuffer, 0, termLength);
Index: lucene/src/java/org/apache/lucene/search/BoostAttributeImpl.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/BoostAttributeImpl.java (revision 1060792)
+++ lucene/src/java/org/apache/lucene/search/BoostAttributeImpl.java (working copy)
@@ -37,20 +37,6 @@
public void clear() {
boost = 1.0f;
}
-
- @Override
- public boolean equals(Object other) {
- if (this == other)
- return true;
- if (other instanceof BoostAttributeImpl)
- return ((BoostAttributeImpl) other).boost == boost;
- return false;
- }
-
- @Override
- public int hashCode() {
- return Float.floatToIntBits(boost);
- }
@Override
public void copyTo(AttributeImpl target) {
Index: lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttributeImpl.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttributeImpl.java (revision 1060792)
+++ lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttributeImpl.java (working copy)
@@ -48,25 +48,6 @@
maxNonCompetitiveBoost = Float.NEGATIVE_INFINITY;
competitiveTerm = null;
}
-
- @Override
- public boolean equals(Object other) {
- if (this == other)
- return true;
- if (other instanceof MaxNonCompetitiveBoostAttributeImpl) {
- final MaxNonCompetitiveBoostAttributeImpl o = (MaxNonCompetitiveBoostAttributeImpl) other;
- return (o.maxNonCompetitiveBoost == maxNonCompetitiveBoost)
- && (o.competitiveTerm == null ? competitiveTerm == null : o.competitiveTerm.equals(competitiveTerm));
- }
- return false;
- }
-
- @Override
- public int hashCode() {
- int hash = Float.floatToIntBits(maxNonCompetitiveBoost);
- if (competitiveTerm != null) hash = 31 * hash + competitiveTerm.hashCode();
- return hash;
- }
@Override
public void copyTo(AttributeImpl target) {
Index: lucene/src/java/org/apache/lucene/util/AttributeImpl.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/AttributeImpl.java (revision 1060792)
+++ lucene/src/java/org/apache/lucene/util/AttributeImpl.java (working copy)
@@ -20,6 +20,8 @@
import java.io.Serializable;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
+import java.lang.ref.WeakReference;
+import java.util.LinkedList;
/**
* Base class for Attributes that can be added to a
@@ -37,72 +39,80 @@
public abstract void clear();
/**
- * The default implementation of this method accesses all declared
- * fields of this object and prints the values in the following syntax:
+ * This method returns the current attribute values as a string in the following format
+ * by calling the {@link #reflectWith(AttributeReflector)} method:
*
+ *
+ * iff {@code prependAttClass=true}: {@code "AttributeClass#key=value,AttributeClass#key=value"}
+ * iff {@code prependAttClass=false}: {@code "key=value,key=value"}
+ *
+ *
+ * @see #reflectWith(AttributeReflector)
+ */
+ public final String reflectAsString(final boolean prependAttClass) {
+ final StringBuilder buffer = new StringBuilder();
+ reflectWith(new AttributeReflector() {
+ public void reflect(Class extends Attribute> attClass, String key, Object value) {
+ if (buffer.length() > 0) {
+ buffer.append(',');
+ }
+ if (prependAttClass) {
+ buffer.append(attClass.getName()).append('#');
+ }
+ buffer.append(key).append('=').append((value == null) ? "null" : value);
+ }
+ });
+ return buffer.toString();
+ }
+
+ /**
+ * This method is for introspection of attributes, it should simply
+ * add the key/values this attribute holds to the given {@link AttributeReflector}.
+ *
+ * The default implementation calls {@link AttributeReflector#reflect} for all
+ * non-static fields from the implementing class, using the field name as key
+ * and the field value as value. The Attribute class is also determined by reflection.
+ * Please note that the default implementation can only handle single-Attribute
+ * implementations.
+ *
+ *
Custom implementations look like this (e.g. for a combined attribute implementation):
*
- * public String toString() {
- * return "start=" + startOffset + ",end=" + endOffset;
+ * public void reflectWith(AttributeReflector reflector) {
+ * reflector.reflect(CharTermAttribute.class, "term", term());
+ * reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", getPositionIncrement());
* }
*
- *
- * This method may be overridden by subclasses.
+ *
+ * If you implement this method, make sure that for each invocation, the same set of {@link Attribute}
+ * interfaces and keys are passed to {@link AttributeReflector#reflect} in the same order, but possibly
+ * different values. So don't automatically exclude e.g. {@code null} properties!
+ *
+ * @see #reflectAsString(boolean)
*/
- @Override
- public String toString() {
- StringBuilder buffer = new StringBuilder();
- Class> clazz = this.getClass();
- Field[] fields = clazz.getDeclaredFields();
+ public void reflectWith(AttributeReflector reflector) {
+ final Class extends AttributeImpl> clazz = this.getClass();
+ final LinkedList>> interfaces = AttributeSource.getAttributeInterfaces(clazz);
+ if (interfaces.size() != 1) {
+ throw new UnsupportedOperationException(clazz.getName() +
+ " implements more than one Attribute interface, the default reflectWith() implementation cannot handle this.");
+ }
+ final Class extends Attribute> interf = interfaces.getFirst().get();
+ final Field[] fields = clazz.getDeclaredFields();
try {
for (int i = 0; i < fields.length; i++) {
- Field f = fields[i];
+ final Field f = fields[i];
if (Modifier.isStatic(f.getModifiers())) continue;
f.setAccessible(true);
- Object value = f.get(this);
- if (buffer.length()>0) {
- buffer.append(',');
- }
- if (value == null) {
- buffer.append(f.getName() + "=null");
- } else {
- buffer.append(f.getName() + "=" + value);
- }
+ reflector.reflect(interf, f.getName(), f.get(this));
}
} catch (IllegalAccessException e) {
// this should never happen, because we're just accessing fields
// from 'this'
throw new RuntimeException(e);
}
-
- return buffer.toString();
}
/**
- * Subclasses must implement this method and should compute
- * a hashCode similar to this:
- *
- * public int hashCode() {
- * int code = startOffset;
- * code = code * 31 + endOffset;
- * return code;
- * }
- *
- *
- * see also {@link #equals(Object)}
- */
- @Override
- public abstract int hashCode();
-
- /**
- * All values used for computation of {@link #hashCode()}
- * should be checked here for equality.
- *
- * see also {@link Object#equals(Object)}
- */
- @Override
- public abstract boolean equals(Object other);
-
- /**
* Copies the values from this Attribute into the passed-in
* target attribute. The target implementation must support all the
* Attributes this implementation supports.
Index: lucene/src/java/org/apache/lucene/util/AttributeSource.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/AttributeSource.java (revision 1060792)
+++ lucene/src/java/org/apache/lucene/util/AttributeSource.java (working copy)
@@ -180,20 +180,9 @@
private static final WeakHashMap,LinkedList>>> knownImplClasses =
new WeakHashMap,LinkedList>>>();
- /** Expert: Adds a custom AttributeImpl instance with one or more Attribute interfaces.
- * Please note: It is not guaranteed, that att is added to
- * the AttributeSource, because the provided attributes may already exist.
- * You should always retrieve the wanted attributes using {@link #getAttribute} after adding
- * with this method and cast to your class.
- * The recommended way to use custom implementations is using an {@link AttributeFactory}.
- *
- */
- public void addAttributeImpl(final AttributeImpl att) {
- final Class extends AttributeImpl> clazz = att.getClass();
- if (attributeImpls.containsKey(clazz)) return;
- LinkedList>> foundInterfaces;
+ static LinkedList>> getAttributeInterfaces(final Class extends AttributeImpl> clazz) {
synchronized(knownImplClasses) {
- foundInterfaces = knownImplClasses.get(clazz);
+ LinkedList>> foundInterfaces = knownImplClasses.get(clazz);
if (foundInterfaces == null) {
// we have a strong reference to the class instance holding all interfaces in the list (parameter "att"),
// so all WeakReferences are never evicted by GC
@@ -210,7 +199,23 @@
actClazz = actClazz.getSuperclass();
} while (actClazz != null);
}
+ return foundInterfaces;
}
+ }
+
+ /** Expert: Adds a custom AttributeImpl instance with one or more Attribute interfaces.
+ * Please note: It is not guaranteed, that att is added to
+ * the AttributeSource, because the provided attributes may already exist.
+ * You should always retrieve the wanted attributes using {@link #getAttribute} after adding
+ * with this method and cast to your class.
+ * The recommended way to use custom implementations is using an {@link AttributeFactory}.
+ *
+ */
+ public final void addAttributeImpl(final AttributeImpl att) {
+ final Class extends AttributeImpl> clazz = att.getClass();
+ if (attributeImpls.containsKey(clazz)) return;
+ final LinkedList>> foundInterfaces =
+ getAttributeInterfaces(clazz);
// add all interfaces of this AttributeImpl to the maps
for (WeakReference> curInterfaceRef : foundInterfaces) {
@@ -233,7 +238,7 @@
* already in this AttributeSource and returns it. Otherwise a
* new instance is created, added to this AttributeSource and returned.
*/
- public A addAttribute(Class attClass) {
+ public final A addAttribute(Class attClass) {
AttributeImpl attImpl = attributes.get(attClass);
if (attImpl == null) {
if (!(attClass.isInterface() && Attribute.class.isAssignableFrom(attClass))) {
@@ -248,7 +253,7 @@
}
/** Returns true, iff this AttributeSource has any attributes */
- public boolean hasAttributes() {
+ public final boolean hasAttributes() {
return !this.attributes.isEmpty();
}
@@ -256,7 +261,7 @@
* The caller must pass in a Class<? extends Attribute> value.
* Returns true, iff this AttributeSource contains the passed-in Attribute.
*/
- public boolean hasAttribute(Class extends Attribute> attClass) {
+ public final boolean hasAttribute(Class extends Attribute> attClass) {
return this.attributes.containsKey(attClass);
}
@@ -271,7 +276,7 @@
* available. If you want to only use the attribute, if it is available (to optimize
* consuming), use {@link #hasAttribute}.
*/
- public A getAttribute(Class attClass) {
+ public final A getAttribute(Class attClass) {
AttributeImpl attImpl = attributes.get(attClass);
if (attImpl == null) {
throw new IllegalArgumentException("This AttributeSource does not have the attribute '" + attClass.getName() + "'.");
@@ -319,7 +324,7 @@
* Resets all Attributes in this AttributeSource by calling
* {@link AttributeImpl#clear()} on each Attribute implementation.
*/
- public void clearAttributes() {
+ public final void clearAttributes() {
if (hasAttributes()) {
if (currentState == null) {
computeCurrentState();
@@ -334,7 +339,7 @@
* Captures the state of all Attributes. The return value can be passed to
* {@link #restoreState} to restore the state of this or another AttributeSource.
*/
- public State captureState() {
+ public final State captureState() {
if (!hasAttributes()) {
return null;
}
@@ -360,7 +365,7 @@
* reset its value to the default, in which case the caller should first
* call {@link TokenStream#clearAttributes()} on the targetStream.
*/
- public void restoreState(State state) {
+ public final void restoreState(State state) {
if (state == null) return;
do {
@@ -431,21 +436,53 @@
return false;
}
- @Override
- public String toString() {
- StringBuilder sb = new StringBuilder().append('(');
+ /**
+ * This method returns the current attribute values as a string in the following format
+ * by calling the {@link #reflectWith(AttributeReflector)} method:
+ *
+ *
+ * iff {@code prependAttClass=true}: {@code "AttributeClass#key=value,AttributeClass#key=value"}
+ * iff {@code prependAttClass=false}: {@code "key=value,key=value"}
+ *
+ *
+ * @see #reflectWith(AttributeReflector)
+ */
+ public final String reflectAsString(final boolean prependAttClass) {
+ final StringBuilder buffer = new StringBuilder();
+ reflectWith(new AttributeReflector() {
+ public void reflect(Class extends Attribute> attClass, String key, Object value) {
+ if (buffer.length() > 0) {
+ buffer.append(',');
+ }
+ if (prependAttClass) {
+ buffer.append(attClass.getName()).append('#');
+ }
+ buffer.append(key).append('=').append((value == null) ? "null" : value);
+ }
+ });
+ return buffer.toString();
+ }
+
+ /**
+ * This method is for introspection of attributes, it should simply
+ * add the key/values this AttributeSource holds to the given {@link AttributeReflector}.
+ *
+ * This method iterates over all Attribute implementations and calls the
+ * corresponding {@link AttributeImpl#reflectWith} method.
+ *
+ * @see AttributeImpl#reflectWith
+ */
+ public final void reflectWith(AttributeReflector reflector) {
if (hasAttributes()) {
if (currentState == null) {
computeCurrentState();
}
for (State state = currentState; state != null; state = state.next) {
- if (state != currentState) sb.append(',');
- sb.append(state.attribute.toString());
+ state.attribute.reflectWith(reflector);
}
}
- return sb.append(')').toString();
}
-
+
/**
* Performs a clone of all {@link AttributeImpl} instances returned in a new
* {@code AttributeSource} instance. This method can be used to e.g. create another TokenStream
@@ -453,7 +490,7 @@
* You can also use it as a (non-performant) replacement for {@link #captureState}, if you need to look
* into / modify the captured state.
*/
- public AttributeSource cloneAttributes() {
+ public final AttributeSource cloneAttributes() {
final AttributeSource clone = new AttributeSource(this.factory);
if (hasAttributes()) {
Index: lucene/src/test/org/apache/lucene/analysis/TestToken.java
===================================================================
--- lucene/src/test/org/apache/lucene/analysis/TestToken.java (revision 1060792)
+++ lucene/src/test/org/apache/lucene/analysis/TestToken.java (working copy)
@@ -22,8 +22,11 @@
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util._TestUtil;
import java.io.StringReader;
+import java.util.HashMap;
public class TestToken extends LuceneTestCase {
@@ -241,6 +244,22 @@
ts.addAttribute(TypeAttribute.class) instanceof Token);
}
+ public void testAttributeReflection() throws Exception {
+ Token t = new Token("foobar", 6, 22, 8);
+ _TestUtil.assertAttributeReflection(t,
+ new HashMap() {{
+ put(CharTermAttribute.class.getName() + "#term", "foobar");
+ put(TermToBytesRefAttribute.class.getName() + "#bytes", new BytesRef("foobar"));
+ put(OffsetAttribute.class.getName() + "#startOffset", 6);
+ put(OffsetAttribute.class.getName() + "#endOffset", 22);
+ put(PositionIncrementAttribute.class.getName() + "#positionIncrement", 1);
+ put(PayloadAttribute.class.getName() + "#payload", null);
+ put(TypeAttribute.class.getName() + "#type", TypeAttribute.DEFAULT_TYPE);
+ put(FlagsAttribute.class.getName() + "#flags", 8);
+ }});
+ }
+
+
public static T assertCloneIsEqual(T att) {
@SuppressWarnings("unchecked")
T clone = (T) att.clone();
Index: lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java
===================================================================
--- lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java (revision 1060792)
+++ lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java (working copy)
@@ -19,7 +19,10 @@
import org.apache.lucene.analysis.TestToken;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util._TestUtil;
import java.nio.CharBuffer;
+import java.util.HashMap;
import java.util.Formatter;
import java.util.Locale;
import java.util.regex.Pattern;
@@ -126,6 +129,15 @@
assertNotSame(buf, copy.buffer());
}
+ public void testAttributeReflection() throws Exception {
+ CharTermAttributeImpl t = new CharTermAttributeImpl();
+ t.append("foobar");
+ _TestUtil.assertAttributeReflection(t, new HashMap() {{
+ put(CharTermAttribute.class.getName() + "#term", "foobar");
+ put(TermToBytesRefAttribute.class.getName() + "#bytes", new BytesRef("foobar"));
+ }});
+ }
+
public void testCharSequenceInterface() {
final String s = "0123456789";
final CharTermAttributeImpl t = new CharTermAttributeImpl();
Index: lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestSimpleAttributeImpl.java
===================================================================
--- lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestSimpleAttributeImpl.java (revision 0)
+++ lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestSimpleAttributeImpl.java (revision 0)
@@ -0,0 +1,46 @@
+package org.apache.lucene.analysis.tokenattributes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.LuceneTestCase;
+
+import java.util.Collections;
+import java.util.HashMap;
+
+public class TestSimpleAttributeImpl extends LuceneTestCase {
+
+ // this checks using reflection API if the defaults are correct
+ public void testAttributes() {
+ _TestUtil.assertAttributeReflection(new PositionIncrementAttributeImpl(),
+ Collections.singletonMap(PositionIncrementAttribute.class.getName()+"#positionIncrement", 1));
+ _TestUtil.assertAttributeReflection(new FlagsAttributeImpl(),
+ Collections.singletonMap(FlagsAttribute.class.getName()+"#flags", 0));
+ _TestUtil.assertAttributeReflection(new TypeAttributeImpl(),
+ Collections.singletonMap(TypeAttribute.class.getName()+"#type", TypeAttribute.DEFAULT_TYPE));
+ _TestUtil.assertAttributeReflection(new PayloadAttributeImpl(),
+ Collections.singletonMap(PayloadAttribute.class.getName()+"#payload", null));
+ _TestUtil.assertAttributeReflection(new KeywordAttributeImpl(),
+ Collections.singletonMap(KeywordAttribute.class.getName()+"#keyword", false));
+ _TestUtil.assertAttributeReflection(new OffsetAttributeImpl(), new HashMap() {{
+ put(OffsetAttribute.class.getName()+"#startOffset", 0);
+ put(OffsetAttribute.class.getName()+"#endOffset", 0);
+ }});
+ }
+
+}
Property changes on: lucene\src\test\org\apache\lucene\analysis\tokenattributes\TestSimpleAttributeImpl.java
___________________________________________________________________
Added: svn:keywords
+ Date Author Id Revision HeadURL
Added: svn:eol-style
+ native
Index: lucene/src/test/org/apache/lucene/util/_TestUtil.java
===================================================================
--- lucene/src/test/org/apache/lucene/util/_TestUtil.java (revision 1060792)
+++ lucene/src/test/org/apache/lucene/util/_TestUtil.java (working copy)
@@ -22,7 +22,11 @@
import java.io.IOException;
import java.io.PrintStream;
import java.util.Random;
+import java.util.Map;
+import java.util.HashMap;
+import org.junit.Assert;
+
import org.apache.lucene.index.CheckIndex;
import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.IndexWriter;
@@ -238,4 +242,17 @@
((ConcurrentMergeScheduler) ms).setMaxMergeCount(3);
}
}
+
+ /** Checks some basic behaviour of an AttributeImpl
+ * @param reflectedValues contains a map with "AttributeClass#key" as values
+ */
+ public static void assertAttributeReflection(final AttributeImpl att, Map reflectedValues) {
+ final Map map = new HashMap();
+ att.reflectWith(new AttributeReflector() {
+ public void reflect(Class extends Attribute> attClass, String key, Object value) {
+ map.put(attClass.getName() + '#' + key, value);
+ }
+ });
+ Assert.assertEquals("Reflection does not produce same map", reflectedValues, map);
+ }
}
Index: lucene/src/test/org/apache/lucene/util/TestAttributeSource.java
===================================================================
--- lucene/src/test/org/apache/lucene/util/TestAttributeSource.java (revision 1060792)
+++ lucene/src/test/org/apache/lucene/util/TestAttributeSource.java (working copy)
@@ -109,34 +109,6 @@
assertEquals("TypeAttribute of original and clone must be equal", typeAtt2, typeAtt);
}
- public void testToStringAndMultiAttributeImplementations() {
- AttributeSource src = new AttributeSource();
- CharTermAttribute termAtt = src.addAttribute(CharTermAttribute.class);
- TypeAttribute typeAtt = src.addAttribute(TypeAttribute.class);
- termAtt.append("TestTerm");
- typeAtt.setType("TestType");
- assertEquals("Attributes should appear in original order", "("+termAtt.toString()+","+typeAtt.toString()+")", src.toString());
- Iterator it = src.getAttributeImplsIterator();
- assertTrue("Iterator should have 2 attributes left", it.hasNext());
- assertSame("First AttributeImpl from iterator should be termAtt", termAtt, it.next());
- assertTrue("Iterator should have 1 attributes left", it.hasNext());
- assertSame("Second AttributeImpl from iterator should be typeAtt", typeAtt, it.next());
- assertFalse("Iterator should have 0 attributes left", it.hasNext());
-
- src = new AttributeSource();
- src.addAttributeImpl(new Token());
- // this should not add a new attribute as Token implements CharTermAttribute, too
- termAtt = src.addAttribute(CharTermAttribute.class);
- assertTrue("CharTermAttribute should be implemented by Token", termAtt instanceof Token);
- // get the Token attribute and check, that it is the only one
- it = src.getAttributeImplsIterator();
- Token tok = (Token) it.next();
- assertFalse("There should be only one attribute implementation instance", it.hasNext());
-
- termAtt.setEmpty().append("TestTerm");
- assertEquals("Token should only printed once", "("+tok.toString()+")", src.toString());
- }
-
public void testDefaultAttributeFactory() throws Exception {
AttributeSource src = new AttributeSource();
Index: modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java
===================================================================
--- modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java (revision 1060792)
+++ modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java (working copy)
@@ -20,6 +20,7 @@
import java.io.Serializable;
import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.AttributeReflector;
import com.ibm.icu.lang.UScript;
@@ -77,7 +78,7 @@
}
@Override
- public String toString() {
- return "script=" + getName();
+ public void reflectWith(AttributeReflector reflector) {
+ reflector.reflect(ScriptAttribute.class, "script", getName());
}
}
Property changes on: solr
___________________________________________________________________
Modified: svn:mergeinfo
Merged /lucene/dev/branches/branch_3x/solr:r1060784
Index: solr/common-build.xml
===================================================================
--- solr/common-build.xml (revision 1060792)
+++ solr/common-build.xml (working copy)
@@ -270,7 +270,7 @@
includeAntRuntime="${javac.includeAntRuntime}"
sourcepath=""
classpathref="@{classpathref}">
-
+
Index: solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java
===================================================================
--- solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java (revision 1060792)
+++ solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java (working copy)
@@ -1,243 +0,0 @@
-package org.apache.solr.handler;
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.commons.io.IOUtils;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.*;
-import org.apache.lucene.util.BytesRef;
-import org.apache.solr.common.SolrException;
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.common.util.ContentStream;
-import org.apache.solr.common.util.NamedList;
-import org.apache.solr.common.util.SimpleOrderedMap;
-import org.apache.solr.request.SolrQueryRequest;
-import org.apache.solr.response.SolrQueryResponse;
-import org.apache.solr.schema.FieldType;
-import org.apache.solr.schema.IndexSchema;
-import org.apache.solr.schema.SchemaField;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import javax.xml.stream.XMLInputFactory;
-import javax.xml.stream.XMLStreamConstants;
-import javax.xml.stream.XMLStreamException;
-import javax.xml.stream.XMLStreamReader;
-import java.io.IOException;
-import java.io.Reader;
-import java.io.StringReader;
-import java.util.Collection;
-
-/**
- *
- * @deprecated Use {@link org.apache.solr.handler.DocumentAnalysisRequestHandler} instead.
- **/
-@Deprecated
-public class AnalysisRequestHandler extends RequestHandlerBase {
-
- public static Logger log = LoggerFactory.getLogger(AnalysisRequestHandler.class);
-
- private XMLInputFactory inputFactory;
-
- @Override
- public void init(NamedList args) {
- super.init(args);
-
- inputFactory = XMLInputFactory.newInstance();
- try {
- // The java 1.6 bundled stax parser (sjsxp) does not currently have a thread-safe
- // XMLInputFactory, as that implementation tries to cache and reuse the
- // XMLStreamReader. Setting the parser-specific "reuse-instance" property to false
- // prevents this.
- // All other known open-source stax parsers (and the bea ref impl)
- // have thread-safe factories.
- inputFactory.setProperty("reuse-instance", Boolean.FALSE);
- }
- catch (IllegalArgumentException ex) {
- // Other implementations will likely throw this exception since "reuse-instance"
- // isimplementation specific.
- log.debug("Unable to set the 'reuse-instance' property for the input factory: " + inputFactory);
- }
- }
-
- public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
- SolrParams params = req.getParams();
- Iterable streams = req.getContentStreams();
- if (streams != null) {
- for (ContentStream stream : req.getContentStreams()) {
- Reader reader = stream.getReader();
- try {
- XMLStreamReader parser = inputFactory.createXMLStreamReader(reader);
- NamedList result = processContent(parser, req.getSchema());
- rsp.add("response", result);
- }
- finally {
- IOUtils.closeQuietly(reader);
- }
- }
- }
- }
-
- NamedList processContent(XMLStreamReader parser,
- IndexSchema schema) throws XMLStreamException, IOException {
- NamedList result = new SimpleOrderedMap();
- while (true) {
- int event = parser.next();
- switch (event) {
- case XMLStreamConstants.END_DOCUMENT: {
- parser.close();
- return result;
- }
- case XMLStreamConstants.START_ELEMENT: {
- String currTag = parser.getLocalName();
- if ("doc".equals(currTag)) {
- log.trace("Tokenizing doc...");
-
- SolrInputDocument doc = readDoc(parser);
- SchemaField uniq = schema.getUniqueKeyField();
- NamedList>> theTokens = new SimpleOrderedMap>>();
- result.add(doc.getFieldValue(uniq.getName()).toString(), theTokens);
- for (String name : doc.getFieldNames()) {
- FieldType ft = schema.getFieldType(name);
- Analyzer analyzer = ft.getAnalyzer();
- Collection vals = doc.getFieldValues(name);
- for (Object val : vals) {
- Reader reader = new StringReader(val.toString());
- TokenStream tstream = analyzer.tokenStream(name, reader);
- NamedList> tokens = getTokens(tstream);
- theTokens.add(name, tokens);
- }
- }
- }
- break;
- }
- }
- }
- }
-
- static NamedList> getTokens(TokenStream tstream) throws IOException {
- // outer is namedList since order of tokens is important
- NamedList> tokens = new NamedList>();
- // TODO: support custom attributes
- CharTermAttribute termAtt = null;
- TermToBytesRefAttribute bytesAtt = null;
- if (tstream.hasAttribute(CharTermAttribute.class)) {
- termAtt = tstream.getAttribute(CharTermAttribute.class);
- } else if (tstream.hasAttribute(TermToBytesRefAttribute.class)) {
- bytesAtt = tstream.getAttribute(TermToBytesRefAttribute.class);
- }
- final OffsetAttribute offsetAtt = tstream.addAttribute(OffsetAttribute.class);
- final TypeAttribute typeAtt = tstream.addAttribute(TypeAttribute.class);
- final PositionIncrementAttribute posIncAtt = tstream.addAttribute(PositionIncrementAttribute.class);
-
- final BytesRef bytes = new BytesRef();
- while (tstream.incrementToken()) {
- NamedList token = new SimpleOrderedMap();
- tokens.add("token", token);
- if (termAtt != null) {
- token.add("value", termAtt.toString());
- }
- if (bytesAtt != null) {
- bytesAtt.toBytesRef(bytes);
- // TODO: This is incorrect when numeric fields change in later lucene versions. It should use BytesRef directly!
- token.add("value", bytes.utf8ToString());
- }
- token.add("start", offsetAtt.startOffset());
- token.add("end", offsetAtt.endOffset());
- token.add("posInc", posIncAtt.getPositionIncrement());
- token.add("type", typeAtt.type());
- //TODO: handle payloads
- }
- return tokens;
- }
-
- SolrInputDocument readDoc(XMLStreamReader parser) throws XMLStreamException {
- SolrInputDocument doc = new SolrInputDocument();
-
- StringBuilder text = new StringBuilder();
- String name = null;
- String attrName = "";
- float boost = 1.0f;
- boolean isNull = false;
- while (true) {
- int event = parser.next();
- switch (event) {
- // Add everything to the text
- case XMLStreamConstants.SPACE:
- case XMLStreamConstants.CDATA:
- case XMLStreamConstants.CHARACTERS:
- text.append(parser.getText());
- break;
-
- case XMLStreamConstants.END_ELEMENT:
- if ("doc".equals(parser.getLocalName())) {
- return doc;
- } else if ("field".equals(parser.getLocalName())) {
- if (!isNull) {
- doc.addField(name, text.toString(), boost);
- boost = 1.0f;
- }
- }
- break;
-
- case XMLStreamConstants.START_ELEMENT:
- text.setLength(0);
- String localName = parser.getLocalName();
- if (!"field".equals(localName)) {
- log.warn("unexpected XML tag doc/" + localName);
- throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
- "unexpected XML tag doc/" + localName);
- }
-
- String attrVal = "";
- for (int i = 0; i < parser.getAttributeCount(); i++) {
- attrName = parser.getAttributeLocalName(i);
- attrVal = parser.getAttributeValue(i);
- if ("name".equals(attrName)) {
- name = attrVal;
- }
- }
- break;
- }
- }
- }
-
-
- //////////////////////// SolrInfoMBeans methods //////////////////////
- @Override
- public String getDescription() {
- return "Provide Analysis of text";
- }
-
- @Override
- public String getVersion() {
- return "$Revision$";
- }
-
- @Override
- public String getSourceId() {
- return "$Id$";
- }
-
- @Override
- public String getSource() {
- return "$URL$";
- }
-
-}
Index: solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java
===================================================================
--- solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java (revision 1060792)
+++ solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java (working copy)
@@ -20,10 +20,14 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharReader;
import org.apache.lucene.analysis.CharStream;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.*;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.index.Payload;
+import org.apache.lucene.util.Attribute;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.AttributeReflector;
+import org.apache.lucene.util.SorterTemplate;
import org.apache.solr.analysis.CharFilterFactory;
import org.apache.solr.analysis.TokenFilterFactory;
import org.apache.solr.analysis.TokenizerChain;
@@ -34,10 +38,13 @@
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.FieldType;
+import org.apache.solr.util.ByteUtils;
+import org.apache.noggit.CharArr;
import java.io.IOException;
import java.io.StringReader;
import java.util.*;
+import java.math.BigInteger;
/**
* A base class for all analysis request handlers.
@@ -47,7 +54,7 @@
*/
public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
- public static final Set EMPTY_STRING_SET = Collections.emptySet();
+ public static final Set EMPTY_BYTES_SET = Collections.emptySet();
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
rsp.add("analysis", doAnalysis(req));
@@ -107,7 +114,7 @@
}
TokenStream tokenStream = tfac.create(tokenizerChain.charStream(new StringReader(value)));
- List tokens = analyzeTokenStream(tokenStream);
+ List tokens = analyzeTokenStream(tokenStream);
namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context));
@@ -115,7 +122,7 @@
for (TokenFilterFactory tokenFilterFactory : filtfacs) {
tokenStream = tokenFilterFactory.create(listBasedTokenStream);
- List tokenList = analyzeTokenStream(tokenStream);
+ List tokenList = analyzeTokenStream(tokenStream);
namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokenList, context));
listBasedTokenStream = new ListBasedTokenStream(tokenList);
}
@@ -126,14 +133,24 @@
/**
* Analyzes the given text using the given analyzer and returns the produced tokens.
*
- * @param value The value to analyze.
+ * @param query The query to analyze.
* @param analyzer The analyzer to use.
- *
- * @return The produces token list.
*/
- protected List analyzeValue(String value, Analyzer analyzer) {
- TokenStream tokenStream = analyzer.tokenStream("", new StringReader(value));
- return analyzeTokenStream(tokenStream);
+ protected Set getQueryTokenSet(String query, Analyzer analyzer) {
+ final Set tokens = new HashSet();
+ final TokenStream tokenStream = analyzer.tokenStream("", new StringReader(query));
+ final TermToBytesRefAttribute bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
+ try {
+ tokenStream.reset();
+ while (tokenStream.incrementToken()) {
+ final BytesRef bytes = new BytesRef();
+ bytesAtt.toBytesRef(bytes);
+ tokens.add(bytes);
+ }
+ } catch (IOException ioe) {
+ throw new RuntimeException("Error occured while iterating over tokenstream", ioe);
+ }
+ return tokens;
}
/**
@@ -143,41 +160,17 @@
*
* @return List of tokens produced from the TokenStream
*/
- private List analyzeTokenStream(TokenStream tokenStream) {
- List tokens = new ArrayList();
-
- // TODO change this API to support custom attributes
- CharTermAttribute termAtt = null;
- TermToBytesRefAttribute bytesAtt = null;
- if (tokenStream.hasAttribute(CharTermAttribute.class)) {
- termAtt = tokenStream.getAttribute(CharTermAttribute.class);
- } else if (tokenStream.hasAttribute(TermToBytesRefAttribute.class)) {
- bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
- }
- final OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
- final TypeAttribute typeAtt = tokenStream.addAttribute(TypeAttribute.class);
- final PositionIncrementAttribute posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class);
- final FlagsAttribute flagsAtt = tokenStream.addAttribute(FlagsAttribute.class);
- final PayloadAttribute payloadAtt = tokenStream.addAttribute(PayloadAttribute.class);
-
+ private List analyzeTokenStream(TokenStream tokenStream) {
+ List tokens = new ArrayList();
+ // for backwards compatibility, add all "common" attributes
+ tokenStream.addAttribute(PositionIncrementAttribute.class);
+ tokenStream.addAttribute(OffsetAttribute.class);
+ tokenStream.addAttribute(TypeAttribute.class);
final BytesRef bytes = new BytesRef();
try {
+ tokenStream.reset();
while (tokenStream.incrementToken()) {
- Token token = new Token();
- if (termAtt != null) {
- token.setEmpty().append(termAtt);
- }
- if (bytesAtt != null) {
- bytesAtt.toBytesRef(bytes);
- // TODO: This is incorrect when numeric fields change in later lucene versions. It should use BytesRef directly!
- token.setEmpty().append(bytes.utf8ToString());
- }
- token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
- token.setType(typeAtt.type());
- token.setFlags(flagsAtt.getFlags());
- token.setPayload(payloadAtt.getPayload());
- token.setPositionIncrement(posIncAtt.getPositionIncrement());
- tokens.add((Token) token.clone());
+ tokens.add(tokenStream.cloneAttributes());
}
} catch (IOException ioe) {
throw new RuntimeException("Error occured while iterating over tokenstream", ioe);
@@ -186,6 +179,14 @@
return tokens;
}
+ // a static mapping of the reflected attribute keys to the names used in Solr 1.4
+ static Map ATTRIBUTE_MAPPING = Collections.unmodifiableMap(new HashMap() {{
+ put(CharTermAttribute.class.getName() + "#term", "raw_text");
+ put(OffsetAttribute.class.getName() + "#startOffset", "start");
+ put(OffsetAttribute.class.getName() + "#endOffset", "end");
+ put(TypeAttribute.class.getName() + "#type", "type");
+ }});
+
/**
* Converts the list of Tokens to a list of NamedLists representing the tokens.
*
@@ -194,42 +195,88 @@
*
* @return List of NamedLists containing the relevant information taken from the tokens
*/
- private List convertTokensToNamedLists(List tokens, AnalysisContext context) {
- List tokensNamedLists = new ArrayList();
+ private List convertTokensToNamedLists(final List tokens, AnalysisContext context) {
+ final List tokensNamedLists = new ArrayList();
- Collections.sort(tokens, new Comparator() {
- public int compare(Token o1, Token o2) {
- return o1.endOffset() - o2.endOffset();
+ final int[] positions = new int[tokens.size()];
+ int position = 0;
+ for (int i = 0, c = tokens.size(); i < c; i++) {
+ AttributeSource token = tokens.get(i);
+ position += token.addAttribute(PositionIncrementAttribute.class).getPositionIncrement();
+ positions[i] = position;
+ }
+
+ // sort the tokens by absoulte position
+ new SorterTemplate() {
+ @Override
+ protected void swap(int i, int j) {
+ Collections.swap(tokens, i, j);
}
- });
+
+ @Override
+ protected int compare(int i, int j) {
+ return positions[i] - positions[j];
+ }
- int position = 0;
+ @Override
+ protected void setPivot(int i) {
+ pivot = positions[i];
+ }
+
+ @Override
+ protected int comparePivot(int j) {
+ return pivot - positions[j];
+ }
+
+ private int pivot;
+ }.mergeSort(0, tokens.size() - 1);
FieldType fieldType = context.getFieldType();
- for (Token token : tokens) {
- NamedList tokenNamedList = new SimpleOrderedMap();
+ for (int i = 0, c = tokens.size(); i < c; i++) {
+ AttributeSource token = tokens.get(i);
+ final NamedList tokenNamedList = new SimpleOrderedMap();
+ final BytesRef rawBytes = new BytesRef();
+ token.getAttribute(TermToBytesRefAttribute.class).toBytesRef(rawBytes);
- String text = fieldType.indexedToReadable(token.toString());
- tokenNamedList.add("text", text);
- if (!text.equals(token.toString())) {
- tokenNamedList.add("raw_text", token.toString());
- }
- tokenNamedList.add("type", token.type());
- tokenNamedList.add("start", token.startOffset());
- tokenNamedList.add("end", token.endOffset());
+ CharArr text = new CharArr(rawBytes.length);
+ fieldType.indexedToReadable(rawBytes, text);
- position += token.getPositionIncrement();
- tokenNamedList.add("position", position);
+ tokenNamedList.add("text", text.toString());
+ tokenNamedList.add("raw_bytes", rawBytes.toString());
- if (context.getTermsToMatch().contains(token.toString())) {
+ if (context.getTermsToMatch().contains(rawBytes)) {
tokenNamedList.add("match", true);
}
- if (token.getPayload() != null) {
- tokenNamedList.add("payload", token.getPayload());
- }
+ tokenNamedList.add("position", positions[i]);
+ token.reflectWith(new AttributeReflector() {
+ public void reflect(Class extends Attribute> attClass, String key, Object value) {
+ // leave out position and bytes term
+ if (TermToBytesRefAttribute.class.isAssignableFrom(attClass))
+ return;
+ if (PositionIncrementAttribute.class.isAssignableFrom(attClass))
+ return;
+
+ String k = attClass.getName() + '#' + key;
+
+ // map keys for "standard attributes":
+ if (ATTRIBUTE_MAPPING.containsKey(k)) {
+ k = ATTRIBUTE_MAPPING.get(k);
+ }
+
+ if (value instanceof Payload) {
+ final Payload p = (Payload) value;
+ value = new BytesRef(p.getData()).toString();
+ } else if (value instanceof BytesRef) {
+ value = value.toString();
+ }
+
+ tokenNamedList.add(k, value);
+ }
+ });
+
tokensNamedLists.add(tokenNamedList);
}
@@ -261,38 +308,27 @@
*/
// TODO refactor to support custom attributes
protected final static class ListBasedTokenStream extends TokenStream {
- private final List tokens;
- private Iterator tokenIterator;
+ private final List tokens;
+ private Iterator tokenIterator;
- private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
- private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
- private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
- private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
- private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
/**
* Creates a new ListBasedTokenStream which uses the given tokens as its token source.
*
* @param tokens Source of tokens to be used
*/
- ListBasedTokenStream(List tokens) {
+ ListBasedTokenStream(List tokens) {
this.tokens = tokens;
tokenIterator = tokens.iterator();
}
- /**
- * {@inheritDoc}
- */
@Override
public boolean incrementToken() throws IOException {
if (tokenIterator.hasNext()) {
- Token next = tokenIterator.next();
- termAtt.copyBuffer(next.buffer(), 0, next.length());
- typeAtt.setType(next.type());
- offsetAtt.setOffset(next.startOffset(), next.endOffset());
- flagsAtt.setFlags(next.getFlags());
- payloadAtt.setPayload(next.getPayload());
- posIncAtt.setPositionIncrement(next.getPositionIncrement());
+ AttributeSource next = tokenIterator.next();
+ Iterator> atts = next.getAttributeClassesIterator();
+ while (atts.hasNext()) // make sure all att impls in the token exist here
+ addAttribute(atts.next());
+ next.copyTo(this);
return true;
} else {
return false;
@@ -314,7 +350,7 @@
private final String fieldName;
private final FieldType fieldType;
private final Analyzer analyzer;
- private final Set termsToMatch;
+ private final Set termsToMatch;
/**
* Constructs a new AnalysisContext with a given field tpe, analyzer and
@@ -328,7 +364,7 @@
* @param termsToMatch Holds all the terms that should match during the
* analysis process.
*/
- public AnalysisContext(FieldType fieldType, Analyzer analyzer, Set termsToMatch) {
+ public AnalysisContext(FieldType fieldType, Analyzer analyzer, Set termsToMatch) {
this(null, fieldType, analyzer, termsToMatch);
}
@@ -343,7 +379,7 @@
*
*/
public AnalysisContext(String fieldName, FieldType fieldType, Analyzer analyzer) {
- this(fieldName, fieldType, analyzer, EMPTY_STRING_SET);
+ this(fieldName, fieldType, analyzer, EMPTY_BYTES_SET);
}
/**
@@ -359,7 +395,7 @@
* @param termsToMatch Holds all the terms that should match during the
* analysis process.
*/
- public AnalysisContext(String fieldName, FieldType fieldType, Analyzer analyzer, Set termsToMatch) {
+ public AnalysisContext(String fieldName, FieldType fieldType, Analyzer analyzer, Set termsToMatch) {
this.fieldName = fieldName;
this.fieldType = fieldType;
this.analyzer = analyzer;
@@ -378,7 +414,7 @@
return analyzer;
}
- public Set getTermsToMatch() {
+ public Set getTermsToMatch() {
return termsToMatch;
}
}
Index: solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java
===================================================================
--- solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java (revision 1060792)
+++ solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java (working copy)
@@ -19,7 +19,9 @@
import org.apache.commons.io.IOUtils;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.BytesRef;
import org.apache.solr.client.solrj.request.DocumentAnalysisRequest;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
@@ -216,21 +218,20 @@
FieldType fieldType = schema.getFieldType(name);
- Set termsToMatch = new HashSet();
- if (request.getQuery() != null && request.isShowMatch()) {
- try {
- List tokens = analyzeValue(request.getQuery(), fieldType.getQueryAnalyzer());
- for (Token token : tokens) {
- termsToMatch.add(token.toString());
- }
- } catch (Exception e) {
- // ignore analysis exceptions since we are applying arbitrary text to all fields
- }
+ final String queryValue = request.getQuery();
+ Set termsToMatch;
+ try {
+ termsToMatch = (queryValue != null && request.isShowMatch())
+ ? getQueryTokenSet(queryValue, fieldType.getQueryAnalyzer())
+ : EMPTY_BYTES_SET;
+ } catch (Exception e) {
+ // ignore analysis exceptions since we are applying arbitrary text to all fields
+ termsToMatch = EMPTY_BYTES_SET;
}
if (request.getQuery() != null) {
try {
- AnalysisContext analysisContext = new AnalysisContext(fieldType, fieldType.getQueryAnalyzer(), EMPTY_STRING_SET);
+ AnalysisContext analysisContext = new AnalysisContext(fieldType, fieldType.getQueryAnalyzer(), EMPTY_BYTES_SET);
fieldTokens.add("query", analyzeValue(request.getQuery(), analysisContext));
} catch (Exception e) {
// ignore analysis exceptions since we are applying arbitrary text to all fields
Index: solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java
===================================================================
--- solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java (revision 1060792)
+++ solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java (working copy)
@@ -17,7 +17,9 @@
package org.apache.solr.handler;
-import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.BytesRef;
import org.apache.solr.client.solrj.request.FieldAnalysisRequest;
import org.apache.solr.common.params.AnalysisParams;
import org.apache.solr.common.params.CommonParams;
@@ -30,10 +32,7 @@
import org.apache.solr.schema.IndexSchema;
import org.apache.commons.io.IOUtils;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
+import java.util.*;
import java.io.Reader;
import java.io.IOException;
@@ -222,14 +221,10 @@
*/
private NamedList analyzeValues(FieldAnalysisRequest analysisRequest, FieldType fieldType, String fieldName) {
- Set termsToMatch = new HashSet();
- String queryValue = analysisRequest.getQuery();
- if (queryValue != null && analysisRequest.isShowMatch()) {
- List tokens = analyzeValue(queryValue, fieldType.getQueryAnalyzer());
- for (Token token : tokens) {
- termsToMatch.add(token.toString());
- }
- }
+ final String queryValue = analysisRequest.getQuery();
+ final Set termsToMatch = (queryValue != null && analysisRequest.isShowMatch())
+ ? getQueryTokenSet(queryValue, fieldType.getQueryAnalyzer())
+ : EMPTY_BYTES_SET;
NamedList analyzeResults = new SimpleOrderedMap();
if (analysisRequest.getFieldValue() != null) {
Index: solr/src/webapp/web/admin/analysis.jsp
===================================================================
--- solr/src/webapp/web/admin/analysis.jsp (revision 1060792)
+++ solr/src/webapp/web/admin/analysis.jsp (working copy)
@@ -24,6 +24,7 @@
org.apache.lucene.analysis.CharReader,
org.apache.lucene.analysis.CharStream,
org.apache.lucene.analysis.tokenattributes.*,
+ org.apache.lucene.util.AttributeReflector,
org.apache.solr.analysis.CharFilterFactory,
org.apache.solr.analysis.TokenFilterFactory,
org.apache.solr.analysis.TokenizerChain,
@@ -31,7 +32,8 @@
org.apache.solr.schema.FieldType,
org.apache.solr.schema.SchemaField,
org.apache.solr.common.util.XML,
- javax.servlet.jsp.JspWriter,java.io.IOException
+ javax.servlet.jsp.JspWriter,java.io.IOException,
+ org.apache.noggit.CharArr
"%>
<%@ page import="java.io.Reader"%>
<%@ page import="java.io.StringReader"%>
@@ -39,8 +41,6 @@
<%@ page import="java.math.BigInteger" %>
<%-- $Id$ --%>
-<%-- $Source: /cvs/main/searching/org.apache.solrolarServer/resources/admin/analysis.jsp,v $ --%>
-<%-- $Name: $ --%>
<%@include file="header.jsp" %>
@@ -71,19 +71,19 @@
- Field
+ Field
- >name
- >type
+ >name
+ >type
-
+
- Field value (Index)
+ Field value (Index)
verbose output
>
-
+
- Field value (Query)
+ Field value (Query)
verbose output
>
-
+
@@ -115,7 +115,7 @@
-
+
@@ -148,24 +148,28 @@
}
if (field!=null) {
- HashSet matches = null;
+ HashSet matches = null;
if (qval!="" && highlight) {
Reader reader = new StringReader(qval);
Analyzer analyzer = field.getType().getQueryAnalyzer();
TokenStream tstream = analyzer.reusableTokenStream(field.getName(),reader);
+ TermToBytesRefAttribute bytesAtt = tstream.getAttribute(TermToBytesRefAttribute.class);
tstream.reset();
- List tokens = getTokens(tstream);
- matches = new HashSet();
- for (AttributeSource t : tokens) { matches.add( new Tok(t,0)); }
+ matches = new HashSet();
+ while (tstream.incrementToken()) {
+ final BytesRef bytes = new BytesRef();
+ bytesAtt.toBytesRef(bytes);
+ matches.add(bytes);
+ }
}
if (val!="") {
out.println("Index Analyzer ");
- doAnalyzer(out, field, val, false, verbose,matches);
+ doAnalyzer(out, field, val, false, verbose, matches);
}
if (qval!="") {
out.println("Query Analyzer ");
- doAnalyzer(out, field, qval, true, qverbose,null);
+ doAnalyzer(out, field, qval, true, qverbose, null);
}
}
@@ -177,7 +181,7 @@
<%!
- private static void doAnalyzer(JspWriter out, SchemaField field, String val, boolean queryAnalyser, boolean verbose, Set match) throws Exception {
+ private static void doAnalyzer(JspWriter out, SchemaField field, String val, boolean queryAnalyser, boolean verbose, Set match) throws Exception {
FieldType ft = field.getType();
Analyzer analyzer = queryAnalyser ?
@@ -240,7 +244,7 @@
tstream.reset();
List tokens = getTokens(tstream);
if (verbose) {
- writeHeader(out, analyzer.getClass(), new HashMap());
+ writeHeader(out, analyzer.getClass(), Collections.EMPTY_MAP);
}
writeTokens(out, tokens, ft, verbose, match);
}
@@ -249,52 +253,53 @@
static List getTokens(TokenStream tstream) throws IOException {
List tokens = new ArrayList();
-
- while (true) {
- if (!tstream.incrementToken())
- break;
- else {
- tokens.add(tstream.cloneAttributes());
- }
+ tstream.reset();
+ while (tstream.incrementToken()) {
+ tokens.add(tstream.cloneAttributes());
}
return tokens;
}
-
+ private static class ReflectItem {
+ final Class extends Attribute> attClass;
+ final String key;
+ final Object value;
+
+ ReflectItem(Class extends Attribute> attClass, String key, Object value) {
+ this.attClass = attClass;
+ this.key = key;
+ this.value = value;
+ }
+ }
+
private static class Tok {
- AttributeSource token;
- int pos;
+ final BytesRef bytes = new BytesRef();
+ final int pos;
+ final List reflected = new ArrayList();
+
Tok(AttributeSource token, int pos) {
- this.token=token;
- this.pos=pos;
+ token.getAttribute(TermToBytesRefAttribute.class).toBytesRef(bytes);
+ this.pos = pos;
+ token.reflectWith(new AttributeReflector() {
+ public void reflect(Class extends Attribute> attClass, String key, Object value) {
+ // leave out position and raw term
+ if (TermToBytesRefAttribute.class.isAssignableFrom(attClass))
+ return;
+ if (PositionIncrementAttribute.class.isAssignableFrom(attClass))
+ return;
+ if ("term".equals(key) && CharTermAttribute.class.isAssignableFrom(attClass))
+ key = "rawTerm";
+ reflected.add(new ReflectItem(attClass, key, value));
+ }
+ });
}
-
- public boolean equals(Object o) {
- return ((Tok)o).token.toString().equals(token.toString());
- }
- public int hashCode() {
- return token.toString().hashCode();
- }
- public String toString() {
- return token.toString();
- }
- public String toPrintableString() {
- TermToBytesRefAttribute att = token.addAttribute(TermToBytesRefAttribute.class);
- if (att instanceof CharTermAttribute)
- return att.toString();
- else {
- BytesRef bytes = new BytesRef();
- att.toBytesRef(bytes);
- return bytes.toString();
- }
- }
}
- private static interface ToStr {
- public String toStr(Object o);
+ private static interface TokToStr {
+ public String toStr(Tok o);
}
- private static void printRow(JspWriter out, String header, List[] arrLst, ToStr converter, boolean multival, boolean verbose, Set match) throws IOException {
+ private static void printRow(JspWriter out, String header, String headerTitle, List[] arrLst, TokToStr converter, boolean multival, boolean verbose, Set match) throws IOException {
// find the maximum number of terms for any position
int maxSz=1;
if (multival) {
@@ -308,7 +313,13 @@
out.println("");
if (idx==0 && verbose) {
if (header != null) {
- out.print("");
+ out.print(" ");
XML.escapeCharData(header,out);
out.println(" ");
}
@@ -317,7 +328,7 @@
for (int posIndex=0; posIndex lst = arrLst[posIndex];
if (lst.size() <= idx) continue;
- if (match!=null && match.contains(lst.get(idx))) {
+ if (match!=null && match.contains(lst.get(idx).bytes)) {
out.print(" args) throws IOException {
out.print("");
out.print(clazz.getName());
@@ -359,19 +361,14 @@
// readable, raw, pos, type, start/end
- static void writeTokens(JspWriter out, List tokens, final FieldType ft, boolean verbose, Set match) throws IOException {
+ static void writeTokens(JspWriter out, List tokens, final FieldType ft, boolean verbose, Set match) throws IOException {
// Use a map to tell what tokens are in what positions
// because some tokenizers/filters may do funky stuff with
// very large increments, or negative increments.
HashMap> map = new HashMap>();
- boolean needRaw=false;
- int pos=0;
+ int pos=0, reflectionCount = -1;
for (AttributeSource t : tokens) {
- if (!t.toString().equals(ft.indexedToReadable(t.toString()))) {
- needRaw=true;
- }
-
pos += t.addAttribute(PositionIncrementAttribute.class).getPositionIncrement();
List lst = map.get(pos);
if (lst==null) {
@@ -379,117 +376,67 @@
map.put(pos,lst);
}
Tok tok = new Tok(t,pos);
+ // sanity check
+ if (reflectionCount < 0) {
+ reflectionCount = tok.reflected.size();
+ } else {
+ if (reflectionCount != tok.reflected.size())
+ throw new RuntimeException("Should not happen: Number of reflected entries differs for position=" + pos);
+ }
lst.add(tok);
}
List[] arr = (List[])map.values().toArray(new ArrayList[map.size()]);
- /* Jetty 6.1.3 miscompiles this generics version...
- Arrays.sort(arr, new Comparator>() {
- public int compare(List toks, List toks1) {
- return toks.get(0).pos - toks1.get(0).pos;
- }
- }
- */
-
+ // Jetty 6.1.3 miscompiles a generics-enabled version..., without generics:
Arrays.sort(arr, new Comparator() {
public int compare(Object toks, Object toks1) {
return ((List)toks).get(0).pos - ((List)toks1).get(0).pos;
}
- }
+ });
-
- );
-
out.println("");
if (verbose) {
- printRow(out,"term position", arr, new ToStr() {
- public String toStr(Object o) {
- return Integer.toString(((Tok)o).pos);
+ printRow(out, "position", "calculated from " + PositionIncrementAttribute.class.getName(), arr, new TokToStr() {
+ public String toStr(Tok t) {
+ return Integer.toString(t.pos);
}
- }
- ,false
- ,verbose
- ,null);
+ },false,verbose,null);
}
-
- printRow(out,"term text", arr, new ToStr() {
- public String toStr(Object o) {
- return ft.indexedToReadable( ((Tok)o).toPrintableString() );
+ printRow(out, "term", "indexedToReadable applied to " + TermToBytesRefAttribute.class.getName(), arr, new TokToStr() {
+ public String toStr(Tok t) {
+ CharArr text = new CharArr(t.bytes.length);
+ ft.indexedToReadable(t.bytes, text);
+ return text.toString();
}
- }
- ,true
- ,verbose
- ,match
- );
+ },true,verbose,match);
- if (needRaw) {
- printRow(out,"raw text", arr, new ToStr() {
- public String toStr(Object o) {
- // page is UTF-8, so anything goes.
- return ((Tok)o).toPrintableString();
- }
- }
- ,true
- ,verbose
- ,match
- );
- }
-
if (verbose) {
- printRow(out,"term type", arr, new ToStr() {
- public String toStr(Object o) {
- String tt = ((Tok)o).token.addAttribute(TypeAttribute.class).type();
- if (tt == null) {
- return "null";
- } else {
- return tt;
- }
+ printRow(out, "rawBytes", TermToBytesRefAttribute.class.getName(), arr, new TokToStr() {
+ public String toStr(Tok t) {
+ return t.bytes.toString();
}
- }
- ,true
- ,verbose,
- null
- );
- }
+ },true,verbose,match);
- if (verbose) {
- printRow(out,"source start,end", arr, new ToStr() {
- public String toStr(Object o) {
- AttributeSource t = ((Tok)o).token;
- return Integer.toString(t.addAttribute(OffsetAttribute.class).startOffset()) + ',' + t.addAttribute(OffsetAttribute.class).endOffset() ;
- }
- }
- ,true
- ,verbose
- ,null
- );
- }
-
- if (verbose) {
- printRow(out,"payload", arr, new ToStr() {
- public String toStr(Object o) {
- AttributeSource t = ((Tok)o).token;
- Payload p = t.addAttribute(PayloadAttribute.class).getPayload();
- if( null != p ) {
- BigInteger bi = new BigInteger( p.getData() );
- String ret = bi.toString( 16 );
- if (ret.length() % 2 != 0) {
- // Pad with 0
- ret = "0"+ret;
+ for (int att=0; att < reflectionCount; att++) {
+ final ReflectItem item0 = arr[0].get(0).reflected.get(att);
+ final int i = att;
+ printRow(out, item0.key, item0.attClass.getName(), arr, new TokToStr() {
+ public String toStr(Tok t) {
+ final ReflectItem item = t.reflected.get(i);
+ if (item0.attClass != item.attClass || !item0.key.equals(item.key))
+ throw new RuntimeException("Should not happen: attribute types suddenly change at position=" + t.pos);
+ if (item.value instanceof Payload) {
+ final Payload p = (Payload) item.value;
+ return new BytesRef(p.getData()).toString();
+ } else {
+ return (item.value != null) ? item.value.toString() : "";
}
- ret += isPayloadString( p );
- return ret;
}
- return "";
- }
+ },true,verbose, CharTermAttribute.class.isAssignableFrom(item0.attClass) ? match : null);
}
- ,true
- ,verbose
- ,null
- );
}
out.println("
");