Index: lucene/contrib/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java =================================================================== --- lucene/contrib/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java (revision 1060610) +++ lucene/contrib/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java (working copy) @@ -20,6 +20,7 @@ import java.io.Serializable; import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.AttributeReflector; import com.ibm.icu.lang.UScript; @@ -77,7 +78,7 @@ } @Override - public String toString() { - return "script=" + getName(); + public void reflectWith(AttributeReflector reflector) { + reflector.reflect(ScriptAttribute.class, "script", getName()); } } Index: lucene/src/java/org/apache/lucene/analysis/Token.java =================================================================== --- lucene/src/java/org/apache/lucene/analysis/Token.java (revision 1060610) +++ lucene/src/java/org/apache/lucene/analysis/Token.java (working copy) @@ -28,6 +28,7 @@ import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.AttributeReflector; /** A Token is an occurrence of a term from the text of a field. It consists of @@ -589,6 +590,17 @@ } } + @Override + public void reflectWith(AttributeReflector reflector) { + super.reflectWith(reflector); + reflector.reflect(OffsetAttribute.class, "startOffset", startOffset); + reflector.reflect(OffsetAttribute.class, "endOffset", endOffset); + reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", positionIncrement); + reflector.reflect(PayloadAttribute.class, "payload", payload); + reflector.reflect(FlagsAttribute.class, "flags", flags); + reflector.reflect(TypeAttribute.class, "type", type); + } + /** Convenience factory that returns Token as implementation for the basic * attributes and return the default impl (with "Impl" appended) for all other * attributes. Index: lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java =================================================================== --- lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java (revision 1060610) +++ lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java (working copy) @@ -22,6 +22,7 @@ import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.AttributeReflector; import org.apache.lucene.util.RamUsageEstimator; /** @@ -286,6 +287,11 @@ } @Override + public void reflectWith(AttributeReflector reflector) { + reflector.reflect(CharTermAttribute.class, "term", toString()); + } + + @Override public void copyTo(AttributeImpl target) { if (target instanceof CharTermAttribute) { CharTermAttribute t = (CharTermAttribute) target; Index: lucene/src/java/org/apache/lucene/util/AttributeImpl.java =================================================================== --- lucene/src/java/org/apache/lucene/util/AttributeImpl.java (revision 1060610) +++ lucene/src/java/org/apache/lucene/util/AttributeImpl.java (working copy) @@ -20,7 +20,11 @@ import java.io.Serializable; import java.lang.reflect.Field; import java.lang.reflect.Modifier; +import java.lang.ref.WeakReference; +import java.util.LinkedList; +import org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl; // deprecated + /** * Base class for Attributes that can be added to a * {@link org.apache.lucene.util.AttributeSource}. @@ -37,72 +41,134 @@ public abstract void clear(); /** - * The default implementation of this method accesses all declared - * fields of this object and prints the values in the following syntax: + * Returns a string representation of the object. In general, the {@code toString} method + * returns a string that "textually represents" this object. + * + *

WARNING: For backwards compatibility this method is implemented as + * {@code return reflectAsString(false)}. In Lucene 4.0 this default implementation + * will be removed. The reason for this is the problem of + * {@link CharTermAttributeImpl#toString} that must return a string representation + * of the term's char sequence. + * + *

It is recommeneded to use {@link #reflectAsString} or {@link #reflectWith} + * to get a well-defined output of AttributeImpl's internals. + */ + // TODO: @deprecated remove this method in 4.0 + @Override + public String toString() { + return reflectAsString(false); + } + + /** + * This method returns the current attribute values as a string in the following format + * by calling the {@link #reflectWith(AttributeReflector)} method: * + *

+ * + * @see #reflectWith(AttributeReflector) + * @see #toString() + */ + public final String reflectAsString(final boolean prependAttClass) { + final StringBuilder buffer = new StringBuilder(); + reflectWith(new AttributeReflector() { + public void reflect(Class attClass, String key, Object value) { + if (buffer.length() > 0) { + buffer.append(','); + } + if (prependAttClass) { + buffer.append(attClass.getName()).append('#'); + } + buffer.append(key).append('=').append((value == null) ? "null" : value); + } + }); + return buffer.toString(); + } + + /** + * @deprecated this will be removed in Lucene 4.0 + */ + @Deprecated + private static final VirtualMethod toStringMethod = + new VirtualMethod(AttributeImpl.class, "toString"); + + /** + * @deprecated this will be removed in Lucene 4.0 + */ + @Deprecated + private boolean assertExternalClass(Class clazz) { + final String name = clazz.getName(); + return (!name.startsWith("org.apache.lucene.") && !name.startsWith("org.apache.solr.")) + || name.equals("org.apache.lucene.util.TestAttributeSource$TestAttributeImpl"); + } + + /** + * This method is for introspection of attributes, it should simply + * add the key/values this attribute holds to the given {@link AttributeReflector}. + * + *

The default implementation calls {@link AttributeReflector#reflect} for all + * non-static fields from the implementing class, using the field name as key + * and the field value as value. The Attribute class is also determined by reflection. + * Please note that the default implementation can only handle single-Attribute + * implementations. + * + *

Custom implementations look like this (e.g. for a combined attribute implementation): *

-   *   public String toString() {
-   *     return "start=" + startOffset + ",end=" + endOffset;
+   *   public void reflectWith(AttributeReflector reflector) {
+   *     reflector.reflect(CharTermAttribute.class, "term", term());
+   *     reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", getPositionIncrement());
    *   }
    * 
- * - * This method may be overridden by subclasses. + * + *

If you implement this method, make sure that for each invocation, the same set of {@link Attribute} + * interfaces and keys are passed to {@link AttributeReflector#reflect} in the same order, but possibly + * different values. So don't automatically exclude e.g. {@code null} properties! + * + * @see #reflectAsString(boolean) */ - @Override - public String toString() { - StringBuilder buffer = new StringBuilder(); - Class clazz = this.getClass(); - Field[] fields = clazz.getDeclaredFields(); + public void reflectWith(AttributeReflector reflector) { + final Class clazz = this.getClass(); + final LinkedList>> interfaces = AttributeSource.getAttributeInterfaces(clazz); + if (interfaces.size() != 1) { + throw new UnsupportedOperationException(clazz.getName() + + " implements more than one Attribute interface, the default reflectWith() implementation cannot handle this."); + } + final Class interf = interfaces.getFirst().get(); + + // TODO: @deprecated sophisticated(TM) backwards + if (!(this instanceof CharTermAttributeImpl) && toStringMethod.isOverriddenAsOf(clazz)) { + assert assertExternalClass(clazz) : "no Lucene/Solr classes should fallback to toString() parsing"; + // this class overrides toString and for backwards compatibility we try to parse the string returned by this method: + for (String part : toString().split(",")) { + final int pos = part.indexOf('='); + if (pos < 0) { + throw new UnsupportedOperationException("The backwards compatibility layer to support reflectWith() " + + "on old AtributeImpls expects the toString() implementation to return a correct format as specified for method reflectAsString(false)"); + } + reflector.reflect(interf, part.substring(0, pos).trim(), part.substring(pos + 1)); + } + return; + } + // end sophisticated(TM) backwards + + final Field[] fields = clazz.getDeclaredFields(); try { for (int i = 0; i < fields.length; i++) { - Field f = fields[i]; + final Field f = fields[i]; if (Modifier.isStatic(f.getModifiers())) continue; f.setAccessible(true); - Object value = f.get(this); - if (buffer.length()>0) { - buffer.append(','); - } - if (value == null) { - buffer.append(f.getName() + "=null"); - } else { - buffer.append(f.getName() + "=" + value); - } + reflector.reflect(interf, f.getName(), f.get(this)); } } catch (IllegalAccessException e) { // this should never happen, because we're just accessing fields // from 'this' throw new RuntimeException(e); } - - return buffer.toString(); } /** - * Subclasses must implement this method and should compute - * a hashCode similar to this: - *

-   *   public int hashCode() {
-   *     int code = startOffset;
-   *     code = code * 31 + endOffset;
-   *     return code;
-   *   }
-   * 
- * - * see also {@link #equals(Object)} - */ - @Override - public abstract int hashCode(); - - /** - * All values used for computation of {@link #hashCode()} - * should be checked here for equality. - * - * see also {@link Object#equals(Object)} - */ - @Override - public abstract boolean equals(Object other); - - /** * Copies the values from this Attribute into the passed-in * target attribute. The target implementation must support all the * Attributes this implementation supports. Index: lucene/src/java/org/apache/lucene/util/AttributeReflector.java =================================================================== --- lucene/src/java/org/apache/lucene/util/AttributeReflector.java (revision 0) +++ lucene/src/java/org/apache/lucene/util/AttributeReflector.java (revision 0) @@ -0,0 +1,30 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * TODO + */ +public interface AttributeReflector { + + /** + * TODO + */ + public void reflect(Class attClass, String key, Object value); + +} Property changes on: lucene\src\java\org\apache\lucene\util\AttributeReflector.java ___________________________________________________________________ Added: svn:keywords + Date Author Id Revision HeadURL Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/util/AttributeSource.java =================================================================== --- lucene/src/java/org/apache/lucene/util/AttributeSource.java (revision 1060610) +++ lucene/src/java/org/apache/lucene/util/AttributeSource.java (working copy) @@ -188,20 +188,9 @@ private static final WeakHashMap,LinkedList>>> knownImplClasses = new WeakHashMap,LinkedList>>>(); - /** Expert: Adds a custom AttributeImpl instance with one or more Attribute interfaces. - *

Please note: It is not guaranteed, that att is added to - * the AttributeSource, because the provided attributes may already exist. - * You should always retrieve the wanted attributes using {@link #getAttribute} after adding - * with this method and cast to your class. - * The recommended way to use custom implementations is using an {@link AttributeFactory}. - *

- */ - public void addAttributeImpl(final AttributeImpl att) { - final Class clazz = att.getClass(); - if (attributeImpls.containsKey(clazz)) return; - LinkedList>> foundInterfaces; + static LinkedList>> getAttributeInterfaces(final Class clazz) { synchronized(knownImplClasses) { - foundInterfaces = knownImplClasses.get(clazz); + LinkedList>> foundInterfaces = knownImplClasses.get(clazz); if (foundInterfaces == null) { // we have a strong reference to the class instance holding all interfaces in the list (parameter "att"), // so all WeakReferences are never evicted by GC @@ -218,7 +207,23 @@ actClazz = actClazz.getSuperclass(); } while (actClazz != null); } + return foundInterfaces; } + } + + /** Expert: Adds a custom AttributeImpl instance with one or more Attribute interfaces. + *

Please note: It is not guaranteed, that att is added to + * the AttributeSource, because the provided attributes may already exist. + * You should always retrieve the wanted attributes using {@link #getAttribute} after adding + * with this method and cast to your class. + * The recommended way to use custom implementations is using an {@link AttributeFactory}. + *

+ */ + public void addAttributeImpl(final AttributeImpl att) { + final Class clazz = att.getClass(); + if (attributeImpls.containsKey(clazz)) return; + final LinkedList>> foundInterfaces = + getAttributeInterfaces(clazz); // add all interfaces of this AttributeImpl to the maps for (WeakReference> curInterfaceRef : foundInterfaces) { @@ -439,9 +444,21 @@ return false; } + /** + * Returns a string representation of the object. In general, the {@code toString} method + * returns a string that "textually represents" this object. + * + *

WARNING: For backwards compatibility this method is implemented as + * in Lucene 2.9/3.0. In Lucene 4.0 this default implementation + * will be removed. + * + *

It is recommeneded to use {@link #reflectAsString} or {@link #reflectWith} + * to get a well-defined output of AttributeSource's internals. + */ + // TODO: @deprecated remove this method in 4.0 @Override public String toString() { - StringBuilder sb = new StringBuilder().append('('); + final StringBuilder sb = new StringBuilder().append('('); if (hasAttributes()) { if (currentState == null) { computeCurrentState(); @@ -455,6 +472,53 @@ } /** + * This method returns the current attribute values as a string in the following format + * by calling the {@link #reflectWith(AttributeReflector)} method: + * + *

    + *
  • iff {@code prependAttClass=true}: {@code "AttributeClass#key=value,AttributeClass#key=value"} + *
  • iff {@code prependAttClass=false}: {@code "key=value,key=value"} + *
+ * + * @see #reflectWith(AttributeReflector) + */ + public final String reflectAsString(final boolean prependAttClass) { + final StringBuilder buffer = new StringBuilder(); + reflectWith(new AttributeReflector() { + public void reflect(Class attClass, String key, Object value) { + if (buffer.length() > 0) { + buffer.append(','); + } + if (prependAttClass) { + buffer.append(attClass.getName()).append('#'); + } + buffer.append(key).append('=').append((value == null) ? "null" : value); + } + }); + return buffer.toString(); + } + + /** + * This method is for introspection of attributes, it should simply + * add the key/values this AttributeSource holds to the given {@link AttributeReflector}. + * + *

This method iterates over all Attribute implementations and calls the + * corresponding {@link AttributeImpl#reflectWith} method.

+ * + * @see AttributeImpl#reflectWith + */ + public final void reflectWith(AttributeReflector reflector) { + if (hasAttributes()) { + if (currentState == null) { + computeCurrentState(); + } + for (State state = currentState; state != null; state = state.next) { + state.attribute.reflectWith(reflector); + } + } + } + + /** * Performs a clone of all {@link AttributeImpl} instances returned in a new * {@code AttributeSource} instance. This method can be used to e.g. create another TokenStream * with exactly the same attributes (using {@link #AttributeSource(AttributeSource)}). Index: lucene/src/test/org/apache/lucene/analysis/TestToken.java =================================================================== --- lucene/src/test/org/apache/lucene/analysis/TestToken.java (revision 1060610) +++ lucene/src/test/org/apache/lucene/analysis/TestToken.java (working copy) @@ -22,8 +22,10 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util._TestUtil; import java.io.StringReader; +import java.util.HashMap; public class TestToken extends LuceneTestCase { @@ -253,4 +255,19 @@ assertTrue("TypeAttribute is not implemented by Token", ts.addAttribute(TypeAttribute.class) instanceof Token); } + + public void testAttributeReflection() throws Exception { + Token t = new Token("foobar", 6, 22, 8); + _TestUtil.assertAttributeReflection(t, + new HashMap() {{ + put(CharTermAttribute.class.getName() + "#term", "foobar"); + put(OffsetAttribute.class.getName() + "#startOffset", 6); + put(OffsetAttribute.class.getName() + "#endOffset", 22); + put(PositionIncrementAttribute.class.getName() + "#positionIncrement", 1); + put(PayloadAttribute.class.getName() + "#payload", null); + put(TypeAttribute.class.getName() + "#type", TypeAttribute.DEFAULT_TYPE); + put(FlagsAttribute.class.getName() + "#flags", 8); + }}); + } + } Index: lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java =================================================================== --- lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java (revision 1060610) +++ lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java (working copy) @@ -18,7 +18,9 @@ */ import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; import java.nio.CharBuffer; +import java.util.Collections; import java.util.Formatter; import java.util.Locale; import java.util.regex.Pattern; @@ -125,6 +127,13 @@ assertNotSame(buf, copy.buffer()); } + public void testAttributeReflection() throws Exception { + CharTermAttributeImpl t = new CharTermAttributeImpl(); + t.append("foobar"); + _TestUtil.assertAttributeReflection(t, + Collections.singletonMap(CharTermAttribute.class.getName() + "#term", "foobar")); + } + public void testCharSequenceInterface() { final String s = "0123456789"; final CharTermAttributeImpl t = new CharTermAttributeImpl(); Index: lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestSimpleAttributeImpls.java =================================================================== --- lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestSimpleAttributeImpls.java (revision 1060610) +++ lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestSimpleAttributeImpls.java (working copy) @@ -21,7 +21,11 @@ import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.AttributeSource.AttributeFactory; +import org.apache.lucene.util._TestUtil; +import java.util.Collections; +import java.util.HashMap; + @Deprecated public class TestSimpleAttributeImpls extends LuceneTestCase { @@ -40,6 +44,9 @@ att.clear(); assertEquals(0, att.getFlags()); + + _TestUtil.assertAttributeReflection(att, + Collections.singletonMap(FlagsAttribute.class.getName() + "#flags", att.getFlags())); } public void testPositionIncrementAttribute() throws Exception { @@ -57,6 +64,9 @@ att.clear(); assertEquals(1, att.getPositionIncrement()); + + _TestUtil.assertAttributeReflection(att, + Collections.singletonMap(PositionIncrementAttribute.class.getName() + "#positionIncrement", att.getPositionIncrement())); } public void testTypeAttribute() throws Exception { @@ -74,6 +84,9 @@ att.clear(); assertEquals(TypeAttribute.DEFAULT_TYPE, att.type()); + + _TestUtil.assertAttributeReflection(att, + Collections.singletonMap(TypeAttribute.class.getName() + "#type", att.type())); } public void testPayloadAttribute() throws Exception { @@ -82,6 +95,9 @@ Payload pl = new Payload(new byte[]{1,2,3,4}); att.setPayload(pl); + + _TestUtil.assertAttributeReflection(att, + Collections.singletonMap(PayloadAttribute.class.getName() + "#payload", pl)); PayloadAttributeImpl att2 = (PayloadAttributeImpl) assertCloneIsEqual(att); assertEquals(pl, att2.getPayload()); @@ -102,6 +118,12 @@ att.setOffset(12, 34); // no string test here, because order unknown + + _TestUtil.assertAttributeReflection(att, + new HashMap() {{ + put(OffsetAttribute.class.getName() + "#startOffset", 12); + put(OffsetAttribute.class.getName() + "#endOffset", 34); + }}); OffsetAttributeImpl att2 = (OffsetAttributeImpl) assertCloneIsEqual(att); assertEquals(12, att2.startOffset()); @@ -133,6 +155,9 @@ att.copyTo(assertCloneIsEqual); assertTrue(assertCloneIsEqual.isKeyword()); assertTrue(att.isKeyword()); + + _TestUtil.assertAttributeReflection(att, + Collections.singletonMap(KeywordAttribute.class.getName() + "#keyword", att.isKeyword())); } public static final AttributeImpl assertCloneIsEqual(AttributeImpl att) { Index: lucene/src/test/org/apache/lucene/util/_TestUtil.java =================================================================== --- lucene/src/test/org/apache/lucene/util/_TestUtil.java (revision 1060610) +++ lucene/src/test/org/apache/lucene/util/_TestUtil.java (working copy) @@ -22,7 +22,11 @@ import java.io.IOException; import java.io.PrintStream; import java.util.Random; +import java.util.Map; +import java.util.HashMap; +import org.junit.Assert; + import org.apache.lucene.index.CheckIndex; import org.apache.lucene.index.ConcurrentMergeScheduler; import org.apache.lucene.index.IndexWriter; @@ -241,4 +245,17 @@ ((ConcurrentMergeScheduler) ms).setMaxMergeCount(3); } } + + /** Checks some basic behaviour of an AttributeImpl + * @param reflectedValues contains a map with "AttributeClass#key" as values + */ + public static void assertAttributeReflection(final AttributeImpl att, Map reflectedValues) { + final Map map = new HashMap(); + att.reflectWith(new AttributeReflector() { + public void reflect(Class attClass, String key, Object value) { + map.put(attClass.getName() + '#' + key, value); + } + }); + Assert.assertEquals("Reflection does not produce same map", reflectedValues, map); + } } Index: lucene/src/test/org/apache/lucene/util/TestAttributeSource.java =================================================================== --- lucene/src/test/org/apache/lucene/util/TestAttributeSource.java (revision 1060610) +++ lucene/src/test/org/apache/lucene/util/TestAttributeSource.java (working copy) @@ -21,6 +21,8 @@ import org.apache.lucene.analysis.tokenattributes.*; import java.util.Iterator; +import java.util.HashMap; +import java.util.Map; public class TestAttributeSource extends LuceneTestCase { @@ -175,4 +177,70 @@ fail("Should throw IllegalArgumentException"); } catch (IllegalArgumentException iae) {} } + + // this class is included in external class check, so no assertion errors occur + @Deprecated + static class TestAttributeImpl extends AttributeImpl implements FlagsAttribute { + + private int flags = 0; + + public int getFlags() { return flags; } + public void setFlags(int flags) { this.flags = flags; } + + @Override + public void clear() { flags = 0; } + + @Override + public void copyTo(AttributeImpl target) { + FlagsAttribute t = (FlagsAttribute) target; + t.setFlags(flags); + } + + @Override + public String toString() { + return "foo=bar,moo=mae"; + } + + } + + // this class is excluded in external class check, so assertion on calling reflectWith should occur + @Deprecated + static class TestAttributeImpl2 extends TestAttributeImpl {} + + @Deprecated + public void testReflectionOfToString() throws Exception { + final AttributeSource src = new AttributeSource(); + final AttributeImpl att = new TestAttributeImpl(); + src.addAttributeImpl(att); + + assertSame("FlagsAttribute is not implemented by same instance of TestAttributeImpl", + att, src.addAttribute(FlagsAttribute.class)); + + final Map map = new HashMap(); + final AttributeReflector reflector = new AttributeReflector() { + public void reflect(Class attClass, String key, Object value) { + assertSame(FlagsAttribute.class, attClass); + map.put(key, value); + } + }; + att.reflectWith(reflector); + assertEquals(2, map.size()); + assertEquals("bar", map.get("foo")); + assertEquals("mae", map.get("moo")); + + map.clear(); + src.reflectWith(reflector); + assertEquals(2, map.size()); + assertEquals("bar", map.get("foo")); + assertEquals("mae", map.get("moo")); + + map.clear(); + try { + new TestAttributeImpl2().reflectWith(reflector); + fail("TestAttributeImpl2 should fail assertion on toString() parsing"); + } catch (AssertionError e) { + // pass + } + } + } Index: solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java =================================================================== --- solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java (revision 1060610) +++ solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java (working copy) @@ -23,6 +23,8 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; +import org.apache.lucene.util.Attribute; +import org.apache.lucene.util.AttributeReflector; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.params.SolrParams; @@ -45,6 +47,9 @@ import java.io.Reader; import java.io.StringReader; import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; /** * @@ -132,25 +137,33 @@ } } } + + // a static mapping of the reflected attribute keys to the names used in Solr 1.3/1.4 + static Map ATTRIBUTE_MAPPING = Collections.unmodifiableMap(new HashMap() {{ + put(CharTermAttribute.class.getName() + "#term", "value"); + put(OffsetAttribute.class.getName() + "#startOffset", "start"); + put(OffsetAttribute.class.getName() + "#endOffset", "end"); + put(PositionIncrementAttribute.class.getName() + "#positionIncrement", "posInc"); + put(TypeAttribute.class.getName() + "#type", "type"); + }}); static NamedList> getTokens(TokenStream tstream) throws IOException { // outer is namedList since order of tokens is important NamedList> tokens = new NamedList>(); - // TODO: support custom attributes - final CharTermAttribute termAtt = tstream.getAttribute(CharTermAttribute.class); - final OffsetAttribute offsetAtt = tstream.addAttribute(OffsetAttribute.class); - final TypeAttribute typeAtt = tstream.addAttribute(TypeAttribute.class); - final PositionIncrementAttribute posIncAtt = tstream.addAttribute(PositionIncrementAttribute.class); while (tstream.incrementToken()) { - NamedList token = new SimpleOrderedMap(); + final NamedList token = new SimpleOrderedMap(); tokens.add("token", token); - token.add("value", termAtt.toString()); - token.add("start", offsetAtt.startOffset()); - token.add("end", offsetAtt.endOffset()); - token.add("posInc", posIncAtt.getPositionIncrement()); - token.add("type", typeAtt.type()); - //TODO: handle payloads + tstream.reflectWith(new AttributeReflector() { + public void reflect(Class attClass, String key, Object value) { + String k = attClass.getName() + '#' + key; + // map keys for "standard attributes": + if (ATTRIBUTE_MAPPING.containsKey(k)) { + k = ATTRIBUTE_MAPPING.get(k); + } + token.add(k, value); + } + }); } return tokens; } Index: solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java =================================================================== --- solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java (revision 1060610) +++ solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java (working copy) @@ -20,14 +20,16 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.CharReader; import org.apache.lucene.analysis.CharStream; -import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; +import org.apache.lucene.index.Payload; +import org.apache.lucene.util.Attribute; +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.AttributeReflector; +import org.apache.lucene.util.SorterTemplate; import org.apache.solr.analysis.CharFilterFactory; import org.apache.solr.analysis.TokenFilterFactory; import org.apache.solr.analysis.TokenizerChain; @@ -42,6 +44,7 @@ import java.io.IOException; import java.io.StringReader; import java.util.*; +import java.math.BigInteger; /** * A base class for all analysis request handlers. @@ -109,7 +112,7 @@ } TokenStream tokenStream = tfac.create(tokenizerChain.charStream(new StringReader(value))); - List tokens = analyzeTokenStream(tokenStream); + List tokens = analyzeTokenStream(tokenStream); namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context)); @@ -117,7 +120,7 @@ for (TokenFilterFactory tokenFilterFactory : filtfacs) { tokenStream = tokenFilterFactory.create(listBasedTokenStream); - List tokenList = analyzeTokenStream(tokenStream); + List tokenList = analyzeTokenStream(tokenStream); namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokenList, context)); listBasedTokenStream = new ListBasedTokenStream(tokenList); } @@ -132,40 +135,54 @@ * @param analyzer The analyzer to use. * * @return The produces token list. + * @deprecated This method is no longer used by Solr + * @see #getQueryTokenSet */ - protected List analyzeValue(String value, Analyzer analyzer) { + @Deprecated + protected List analyzeValue(String value, Analyzer analyzer) { TokenStream tokenStream = analyzer.tokenStream("", new StringReader(value)); return analyzeTokenStream(tokenStream); } /** + * Analyzes the given text using the given analyzer and returns the produced tokens. + * + * @param query The query to analyze. + * @param analyzer The analyzer to use. + */ + protected Set getQueryTokenSet(String query, Analyzer analyzer) { + final Set tokens = new HashSet(); + final TokenStream tokenStream = analyzer.tokenStream("", new StringReader(query)); + final CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class); + try { + tokenStream.reset(); + while (tokenStream.incrementToken()) { + tokens.add(termAtt.toString()); + } + } catch (IOException ioe) { + throw new RuntimeException("Error occured while iterating over tokenstream", ioe); + } + return tokens; + } + + /** * Analyzes the given TokenStream, collecting the Tokens it produces. * * @param tokenStream TokenStream to analyze * * @return List of tokens produced from the TokenStream */ - private List analyzeTokenStream(TokenStream tokenStream) { - List tokens = new ArrayList(); - - // TODO change this API to support custom attributes - final CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class); - final OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class); - final TypeAttribute typeAtt = tokenStream.addAttribute(TypeAttribute.class); - final PositionIncrementAttribute posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class); - final FlagsAttribute flagsAtt = tokenStream.addAttribute(FlagsAttribute.class); - final PayloadAttribute payloadAtt = tokenStream.addAttribute(PayloadAttribute.class); - + private List analyzeTokenStream(TokenStream tokenStream) { + List tokens = new ArrayList(); + // for backwards compatibility, add all "common" attributes + tokenStream.addAttribute(CharTermAttribute.class); + tokenStream.addAttribute(PositionIncrementAttribute.class); + tokenStream.addAttribute(OffsetAttribute.class); + tokenStream.addAttribute(TypeAttribute.class); try { + tokenStream.reset(); while (tokenStream.incrementToken()) { - Token token = new Token(); - token.setEmpty().append(termAtt); - token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); - token.setType(typeAtt.type()); - token.setFlags(flagsAtt.getFlags()); - token.setPayload(payloadAtt.getPayload()); - token.setPositionIncrement(posIncAtt.getPositionIncrement()); - tokens.add((Token) token.clone()); + tokens.add(tokenStream.cloneAttributes()); } } catch (IOException ioe) { throw new RuntimeException("Error occured while iterating over tokenstream", ioe); @@ -174,6 +191,13 @@ return tokens; } + // a static mapping of the reflected attribute keys to the names used in Solr 1.4 + static Map ATTRIBUTE_MAPPING = Collections.unmodifiableMap(new HashMap() {{ + put(OffsetAttribute.class.getName() + "#startOffset", "start"); + put(OffsetAttribute.class.getName() + "#endOffset", "end"); + put(TypeAttribute.class.getName() + "#type", "type"); + }}); + /** * Converts the list of Tokens to a list of NamedLists representing the tokens. * @@ -182,42 +206,96 @@ * * @return List of NamedLists containing the relevant information taken from the tokens */ - private List convertTokensToNamedLists(List tokens, AnalysisContext context) { - List tokensNamedLists = new ArrayList(); + private List convertTokensToNamedLists(final List tokens, AnalysisContext context) { + final List tokensNamedLists = new ArrayList(); - Collections.sort(tokens, new Comparator() { - public int compare(Token o1, Token o2) { - return o1.endOffset() - o2.endOffset(); + final int[] positions = new int[tokens.size()]; + int position = 0; + for (int i = 0, c = tokens.size(); i < c; i++) { + AttributeSource token = tokens.get(i); + position += token.addAttribute(PositionIncrementAttribute.class).getPositionIncrement(); + positions[i] = position; + } + + // sort the tokens by absoulte position + new SorterTemplate() { + @Override + protected void swap(int i, int j) { + Collections.swap(tokens, i, j); } - }); + + @Override + protected int compare(int i, int j) { + return positions[i] - positions[j]; + } - int position = 0; + @Override + protected void setPivot(int i) { + pivot = positions[i]; + } + + @Override + protected int comparePivot(int j) { + return pivot - positions[j]; + } + + private int pivot; + }.mergeSort(0, tokens.size() - 1); FieldType fieldType = context.getFieldType(); - for (Token token : tokens) { - NamedList tokenNamedList = new SimpleOrderedMap(); + for (int i = 0, c = tokens.size(); i < c; i++) { + AttributeSource token = tokens.get(i); + final NamedList tokenNamedList = new SimpleOrderedMap(); + final String rawText = token.addAttribute(CharTermAttribute.class).toString(); - String text = fieldType.indexedToReadable(token.toString()); + String text = fieldType.indexedToReadable(rawText); tokenNamedList.add("text", text); - if (!text.equals(token.toString())) { - tokenNamedList.add("raw_text", token.toString()); + if (!text.equals(rawText)) { + tokenNamedList.add("raw_text", rawText); } - tokenNamedList.add("type", token.type()); - tokenNamedList.add("start", token.startOffset()); - tokenNamedList.add("end", token.endOffset()); - position += token.getPositionIncrement(); - tokenNamedList.add("position", position); - - if (context.getTermsToMatch().contains(token.toString())) { + if (context.getTermsToMatch().contains(rawText)) { tokenNamedList.add("match", true); } - if (token.getPayload() != null) { - tokenNamedList.add("payload", token.getPayload()); - } + tokenNamedList.add("position", positions[i]); + token.reflectWith(new AttributeReflector() { + public void reflect(Class attClass, String key, Object value) { + // leave out position and term + if (CharTermAttribute.class.isAssignableFrom(attClass)) + return; + if (PositionIncrementAttribute.class.isAssignableFrom(attClass)) + return; + + String k = attClass.getName() + '#' + key; + + // map keys for "standard attributes": + if (ATTRIBUTE_MAPPING.containsKey(k)) { + k = ATTRIBUTE_MAPPING.get(k); + } + + // TODO: special handling for payloads - move this to ResponseWriter? + if (value instanceof Payload) { + Payload p = (Payload) value; + if( null != p ) { + BigInteger bi = new BigInteger( p.getData() ); + String ret = bi.toString( 16 ); + if (ret.length() % 2 != 0) { + // Pad with 0 + ret = "0"+ret; + } + value = ret; + } else { + value = null; + } + } + + tokenNamedList.add(k, value); + } + }); + tokensNamedLists.add(tokenNamedList); } @@ -250,38 +328,27 @@ */ // TODO refactor to support custom attributes protected final static class ListBasedTokenStream extends TokenStream { - private final List tokens; - private Iterator tokenIterator; + private final List tokens; + private Iterator tokenIterator; - private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); - private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); - private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class); - private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class); - private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); /** * Creates a new ListBasedTokenStream which uses the given tokens as its token source. * * @param tokens Source of tokens to be used */ - ListBasedTokenStream(List tokens) { + ListBasedTokenStream(List tokens) { this.tokens = tokens; tokenIterator = tokens.iterator(); } - /** - * {@inheritDoc} - */ @Override public boolean incrementToken() throws IOException { if (tokenIterator.hasNext()) { - Token next = tokenIterator.next(); - termAtt.copyBuffer(next.buffer(), 0, next.length()); - typeAtt.setType(next.type()); - offsetAtt.setOffset(next.startOffset(), next.endOffset()); - flagsAtt.setFlags(next.getFlags()); - payloadAtt.setPayload(next.getPayload()); - posIncAtt.setPositionIncrement(next.getPositionIncrement()); + AttributeSource next = tokenIterator.next(); + Iterator> atts = next.getAttributeClassesIterator(); + while (atts.hasNext()) // make sure all att impls in the token exist here + addAttribute(atts.next()); + next.copyTo(this); return true; } else { return false; Index: solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java =================================================================== --- solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java (revision 1060610) +++ solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java (working copy) @@ -19,7 +19,8 @@ import org.apache.commons.io.IOUtils; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.util.AttributeSource; import org.apache.solr.client.solrj.request.DocumentAnalysisRequest; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; @@ -216,21 +217,20 @@ FieldType fieldType = schema.getFieldType(name); - Set termsToMatch = new HashSet(); - if (request.getQuery() != null && request.isShowMatch()) { - try { - List tokens = analyzeValue(request.getQuery(), fieldType.getQueryAnalyzer()); - for (Token token : tokens) { - termsToMatch.add(token.toString()); - } - } catch (Exception e) { - // ignore analysis exceptions since we are applying arbitrary text to all fields - } + final String queryValue = request.getQuery(); + Set termsToMatch; + try { + termsToMatch = (queryValue != null && request.isShowMatch()) + ? getQueryTokenSet(queryValue, fieldType.getQueryAnalyzer()) + : Collections.emptySet(); + } catch (Exception e) { + // ignore analysis exceptions since we are applying arbitrary text to all fields + termsToMatch = Collections.emptySet(); } if (request.getQuery() != null) { try { - AnalysisContext analysisContext = new AnalysisContext(fieldType, fieldType.getQueryAnalyzer(), Collections.EMPTY_SET); + AnalysisContext analysisContext = new AnalysisContext(fieldType, fieldType.getQueryAnalyzer(), Collections.emptySet()); NamedList> tokens = analyzeValue(request.getQuery(), analysisContext); fieldTokens.add("query", tokens); } catch (Exception e) { Index: solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java =================================================================== --- solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java (revision 1060610) +++ solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java (working copy) @@ -17,7 +17,8 @@ package org.apache.solr.handler; -import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.util.AttributeSource; import org.apache.solr.client.solrj.request.FieldAnalysisRequest; import org.apache.solr.common.params.AnalysisParams; import org.apache.solr.common.params.CommonParams; @@ -30,10 +31,7 @@ import org.apache.solr.schema.IndexSchema; import org.apache.commons.io.IOUtils; -import java.util.Arrays; -import java.util.HashSet; -import java.util.List; -import java.util.Set; +import java.util.*; import java.io.Reader; import java.io.IOException; @@ -222,14 +220,10 @@ */ private NamedList analyzeValues(FieldAnalysisRequest analysisRequest, FieldType fieldType, String fieldName) { - Set termsToMatch = new HashSet(); - String queryValue = analysisRequest.getQuery(); - if (queryValue != null && analysisRequest.isShowMatch()) { - List tokens = analyzeValue(queryValue, fieldType.getQueryAnalyzer()); - for (Token token : tokens) { - termsToMatch.add(token.toString()); - } - } + final String queryValue = analysisRequest.getQuery(); + final Set termsToMatch = (queryValue != null && analysisRequest.isShowMatch()) + ? getQueryTokenSet(queryValue, fieldType.getQueryAnalyzer()) + : Collections.emptySet(); NamedList analyzeResults = new SimpleOrderedMap(); if (analysisRequest.getFieldValue() != null) { Index: solr/src/webapp/web/admin/analysis.jsp =================================================================== --- solr/src/webapp/web/admin/analysis.jsp (revision 1060610) +++ solr/src/webapp/web/admin/analysis.jsp (working copy) @@ -23,6 +23,7 @@ org.apache.lucene.analysis.CharReader, org.apache.lucene.analysis.CharStream, org.apache.lucene.analysis.tokenattributes.*, + org.apache.lucene.util.AttributeReflector, org.apache.solr.analysis.CharFilterFactory, org.apache.solr.analysis.TokenFilterFactory, org.apache.solr.analysis.TokenizerChain, @@ -70,19 +71,19 @@ @@ -114,7 +115,7 @@ @@ -147,24 +148,26 @@ } if (field!=null) { - HashSet matches = null; + HashSet matches = null; if (qval!="" && highlight) { Reader reader = new StringReader(qval); Analyzer analyzer = field.getType().getQueryAnalyzer(); TokenStream tstream = analyzer.reusableTokenStream(field.getName(),reader); + CharTermAttribute termAtt = tstream.addAttribute(CharTermAttribute.class); tstream.reset(); - List tokens = getTokens(tstream); - matches = new HashSet(); - for (AttributeSource t : tokens) { matches.add( new Tok(t,0)); } + matches = new HashSet(); + while (tstream.incrementToken()) { + matches.add(termAtt.toString()); + } } if (val!="") { out.println("

Index Analyzer

"); - doAnalyzer(out, field, val, false, verbose,matches); + doAnalyzer(out, field, val, false, verbose, matches); } if (qval!="") { out.println("

Query Analyzer

"); - doAnalyzer(out, field, qval, true, qverbose,null); + doAnalyzer(out, field, qval, true, qverbose, null); } } @@ -176,7 +179,7 @@ <%! - private static void doAnalyzer(JspWriter out, SchemaField field, String val, boolean queryAnalyser, boolean verbose, Set match) throws Exception { + private static void doAnalyzer(JspWriter out, SchemaField field, String val, boolean queryAnalyser, boolean verbose, Set match) throws Exception { FieldType ft = field.getType(); Analyzer analyzer = queryAnalyser ? @@ -239,7 +242,7 @@ tstream.reset(); List tokens = getTokens(tstream); if (verbose) { - writeHeader(out, analyzer.getClass(), new HashMap()); + writeHeader(out, analyzer.getClass(), Collections.EMPTY_MAP); } writeTokens(out, tokens, ft, verbose, match); } @@ -248,45 +251,51 @@ static List getTokens(TokenStream tstream) throws IOException { List tokens = new ArrayList(); - - while (true) { - if (!tstream.incrementToken()) - break; - else { - tokens.add(tstream.cloneAttributes()); - } + tstream.reset(); + while (tstream.incrementToken()) { + tokens.add(tstream.cloneAttributes()); } return tokens; } - + private static class ReflectItem { + final Class attClass; + final String key; + final Object value; + + ReflectItem(Class attClass, String key, Object value) { + this.attClass = attClass; + this.key = key; + this.value = value; + } + } + private static class Tok { - AttributeSource token; - int pos; + final String term; + final int pos; + final List reflected = new ArrayList(); + Tok(AttributeSource token, int pos) { - this.token=token; - this.pos=pos; + this.term = token.addAttribute(CharTermAttribute.class).toString(); + this.pos = pos; + token.reflectWith(new AttributeReflector() { + public void reflect(Class attClass, String key, Object value) { + // leave out position and term + if (CharTermAttribute.class.isAssignableFrom(attClass)) + return; + if (PositionIncrementAttribute.class.isAssignableFrom(attClass)) + return; + reflected.add(new ReflectItem(attClass, key, value)); + } + }); } - - public boolean equals(Object o) { - return ((Tok)o).token.toString().equals(token.toString()); - } - public int hashCode() { - return token.toString().hashCode(); - } - public String toString() { - return token.toString(); - } - public String toPrintableString() { - return token.addAttribute(CharTermAttribute.class).toString(); - } } - private static interface ToStr { - public String toStr(Object o); + private static interface TokToStr { + public String toStr(Tok o); } - private static void printRow(JspWriter out, String header, List[] arrLst, ToStr converter, boolean multival, boolean verbose, Set match) throws IOException { + private static void printRow(JspWriter out, String header, String headerTitle, List[] arrLst, TokToStr converter, boolean multival, boolean verbose, Set match) throws IOException { // find the maximum number of terms for any position int maxSz=1; if (multival) { @@ -300,7 +309,13 @@ out.println("
"); if (idx==0 && verbose) { if (header != null) { - out.print(""); } @@ -309,7 +324,7 @@ for (int posIndex=0; posIndex lst = arrLst[posIndex]; if (lst.size() <= idx) continue; - if (match!=null && match.contains(lst.get(idx))) { + if (match!=null && match.contains(lst.get(idx).term)) { out.print("
- Field + Field - +
- Field value (Index) + Field value (Index)
verbose output >
- +
- Field value (Query) + Field value (Query)
verbose output >
- +
- +
"); + out.print(""); XML.escapeCharData(header,out); out.println(" args) throws IOException { out.print("

"); @@ -351,16 +368,17 @@ // readable, raw, pos, type, start/end - static void writeTokens(JspWriter out, List tokens, final FieldType ft, boolean verbose, Set match) throws IOException { + static void writeTokens(JspWriter out, List tokens, final FieldType ft, boolean verbose, Set match) throws IOException { // Use a map to tell what tokens are in what positions // because some tokenizers/filters may do funky stuff with // very large increments, or negative increments. HashMap> map = new HashMap>(); boolean needRaw=false; - int pos=0; + int pos=0, reflectionCount = -1; for (AttributeSource t : tokens) { - if (!t.toString().equals(ft.indexedToReadable(t.toString()))) { + String text = t.addAttribute(CharTermAttribute.class).toString(); + if (!text.equals(ft.indexedToReadable(text))) { needRaw=true; } @@ -371,117 +389,78 @@ map.put(pos,lst); } Tok tok = new Tok(t,pos); + // sanity check + if (reflectionCount < 0) { + reflectionCount = tok.reflected.size(); + } else { + if (reflectionCount != tok.reflected.size()) + throw new RuntimeException("Should not happen: Number of reflected entries differs for position=" + pos); + } lst.add(tok); } List[] arr = (List[])map.values().toArray(new ArrayList[map.size()]); - /* Jetty 6.1.3 miscompiles this generics version... - Arrays.sort(arr, new Comparator>() { - public int compare(List toks, List toks1) { - return toks.get(0).pos - toks1.get(0).pos; - } - } - */ - + // Jetty 6.1.3 miscompiles a generics-enabled version..., without generics: Arrays.sort(arr, new Comparator() { public int compare(Object toks, Object toks1) { return ((List)toks).get(0).pos - ((List)toks1).get(0).pos; } - } + }); - - ); - out.println(""); if (verbose) { - printRow(out,"term position", arr, new ToStr() { - public String toStr(Object o) { - return Integer.toString(((Tok)o).pos); + printRow(out, "position", "calculated from " + PositionIncrementAttribute.class.getName(), arr, new TokToStr() { + public String toStr(Tok t) { + return Integer.toString(t.pos); } - } - ,false - ,verbose - ,null); + },false,verbose,null); } - - printRow(out,"term text", arr, new ToStr() { - public String toStr(Object o) { - return ft.indexedToReadable( ((Tok)o).toPrintableString() ); + printRow(out, "term", CharTermAttribute.class.getName(), arr, new TokToStr() { + public String toStr(Tok t) { + return ft.indexedToReadable(t.term); } - } - ,true - ,verbose - ,match - ); + },true,verbose,match); - if (needRaw) { - printRow(out,"raw text", arr, new ToStr() { - public String toStr(Object o) { + if (needRaw && verbose) { + printRow(out, "raw term", CharTermAttribute.class.getName(), arr, new TokToStr() { + public String toStr(Tok t) { // page is UTF-8, so anything goes. - return ((Tok)o).toPrintableString(); + return t.term; } - } - ,true - ,verbose - ,match - ); + },true,verbose,match); } if (verbose) { - printRow(out,"term type", arr, new ToStr() { - public String toStr(Object o) { - String tt = ((Tok)o).token.addAttribute(TypeAttribute.class).type(); - if (tt == null) { - return "null"; - } else { - return tt; - } - } - } - ,true - ,verbose, - null - ); - } - - if (verbose) { - printRow(out,"source start,end", arr, new ToStr() { - public String toStr(Object o) { - AttributeSource t = ((Tok)o).token; - return Integer.toString(t.addAttribute(OffsetAttribute.class).startOffset()) + ',' + t.addAttribute(OffsetAttribute.class).endOffset() ; - } - } - ,true - ,verbose - ,null - ); - } - - if (verbose) { - printRow(out,"payload", arr, new ToStr() { - public String toStr(Object o) { - AttributeSource t = ((Tok)o).token; - Payload p = t.addAttribute(PayloadAttribute.class).getPayload(); - if( null != p ) { - BigInteger bi = new BigInteger( p.getData() ); - String ret = bi.toString( 16 ); - if (ret.length() % 2 != 0) { - // Pad with 0 - ret = "0"+ret; + for (int att=0; att < reflectionCount; att++) { + final ReflectItem item0 = arr[0].get(0).reflected.get(att); + final int i = att; + printRow(out, item0.key, item0.attClass.getName(), arr, new TokToStr() { + public String toStr(Tok t) { + final ReflectItem item = t.reflected.get(i); + if (item0.attClass != item.attClass || !item0.key.equals(item.key)) + throw new RuntimeException("Should not happen: attribute types suddenly change at position=" + t.pos); + if (item.value instanceof Payload) { + Payload p = (Payload) item.value; + if( null != p ) { + BigInteger bi = new BigInteger( p.getData() ); + String ret = bi.toString( 16 ); + if (ret.length() % 2 != 0) { + // Pad with 0 + ret = "0"+ret; + } + //TODO maybe fix: ret += isPayloadString(p); + return ret; + } + return ""; + } else { + return (item.value != null) ? item.value.toString() : ""; } - ret += isPayloadString( p ); - return ret; } - return ""; - } + },true,verbose,null); } - ,true - ,verbose - ,null - ); } out.println("
");