Index: common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java
===================================================================
--- common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java (revision 821307)
+++ common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java (arbetskopia)
@@ -18,6 +18,7 @@
*/
import java.io.IOException;
+import java.io.StringReader;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
@@ -28,6 +29,7 @@
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.miscellaneous.EmptyTokenStream;
import org.apache.lucene.analysis.miscellaneous.PrefixAndSuffixAwareTokenFilter;
import org.apache.lucene.analysis.miscellaneous.SingleTokenTokenStream;
@@ -44,7 +46,7 @@
"testBehavingAsShingleFilter", "testMatrix"
})));
}
-
+
public void testBehavingAsShingleFilter() throws IOException {
ShingleMatrixFilter.defaultSettingsCodec = null;
Index: common/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java
===================================================================
--- common/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java (revision 821307)
+++ common/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java (arbetskopia)
@@ -112,7 +112,7 @@
* See {@link #calculateShingleWeight(org.apache.lucene.analysis.Token, java.util.List, int, java.util.List, java.util.List)}.
*
* NOTE: This filter might not behave correctly if used with custom Attributes, i.e. Attributes other than
- * the ones located in org.apache.lucene.analysis.tokenattributes.
+ * the ones located in org.apache.lucene.analysis.tokenattributes.
*/
public class ShingleMatrixFilter extends TokenStream {
@@ -206,7 +206,7 @@
private TypeAttribute in_typeAtt;
private FlagsAttribute in_flagsAtt;
-
+
/**
* Creates a shingle filter based on a user defined matrix.
*
@@ -237,7 +237,7 @@
// set the input to be an empty token stream, we already have the data.
this.input = new EmptyTokenStream();
-
+
in_termAtt = input.addAttribute(TermAttribute.class);
in_posIncrAtt = input.addAttribute(PositionIncrementAttribute.class);
in_payloadAtt = input.addAttribute(PayloadAttribute.class);
@@ -316,7 +316,7 @@
offsetAtt = addAttribute(OffsetAttribute.class);
typeAtt = addAttribute(TypeAttribute.class);
flagsAtt = addAttribute(FlagsAttribute.class);
-
+
in_termAtt = input.addAttribute(TermAttribute.class);
in_posIncrAtt = input.addAttribute(PositionIncrementAttribute.class);
in_payloadAtt = input.addAttribute(PayloadAttribute.class);
@@ -328,12 +328,12 @@
// internal filter instance variables
/** iterator over the current matrix row permutations */
- private Iterator permutations;
+ private Iterator permutations;
/** the current permutation of tokens used to produce shingles */
- private List currentPermuationTokens;
+ private List currentPermuationTokens;
/** index to what row a token in currentShingleTokens represents*/
- private List currentPermutationRows;
+ private List currentPermutationRows;
private int currentPermutationTokensStartOffset;
private int currentShingleLength;
@@ -342,7 +342,7 @@
* a set containing shingles that has been the result of a call to next(Token),
* used to avoid producing the same shingle more than once.
*/
- private Set shinglesSeen = new HashSet();
+ private Set> shinglesSeen = new HashSet>();
public void reset() throws IOException {
@@ -352,9 +352,9 @@
}
private Matrix matrix;
-
+
private Token reusableToken = new Token();
-
+
public final boolean incrementToken() throws IOException {
if (matrix == null) {
matrix = new Matrix();
@@ -372,7 +372,7 @@
token = produceNextToken(reusableToken);
} while (token == request_next_token);
if (token == null) return false;
-
+
termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength());
posIncrAtt.setPositionIncrement(token.getPositionIncrement());
flagsAtt.setFlags(token.getFlags());
@@ -381,7 +381,7 @@
payloadAtt.setPayload(token.getPayload());
return true;
}
-
+
private Token getNextInputToken(Token token) throws IOException {
if (!input.incrementToken()) return null;
token.setTermBuffer(in_termAtt.termBuffer(), 0, in_termAtt.termLength());
@@ -404,7 +404,7 @@
public final Token next() throws java.io.IOException {
return super.next();
}
-
+
private static final Token request_next_token = new Token();
/**
@@ -428,16 +428,16 @@
if (ignoringSinglePrefixOrSuffixShingle
&& currentShingleLength == 1
- && (((Matrix.Column.Row) currentPermutationRows.get(currentPermutationTokensStartOffset)).getColumn().isFirst() || ((Matrix.Column.Row) currentPermutationRows.get(currentPermutationTokensStartOffset)).getColumn().isLast())) {
+ && ((currentPermutationRows.get(currentPermutationTokensStartOffset)).getColumn().isFirst() || (currentPermutationRows.get(currentPermutationTokensStartOffset)).getColumn().isLast())) {
return next(reusableToken);
}
int termLength = 0;
- List shingle = new ArrayList();
+ List shingle = new ArrayList(currentShingleLength);
for (int i = 0; i < currentShingleLength; i++) {
- Token shingleToken = (Token) currentPermuationTokens.get(i + currentPermutationTokensStartOffset);
+ Token shingleToken = currentPermuationTokens.get(i + currentPermutationTokensStartOffset);
termLength += shingleToken.termLength();
shingle.add(shingleToken);
}
@@ -452,8 +452,7 @@
// shingle token factory
StringBuilder sb = new StringBuilder(termLength + 10); // paranormal ability to foresee the future.
- for (Iterator iterator = shingle.iterator(); iterator.hasNext();) {
- Token shingleToken = (Token) iterator.next();
+ for (Token shingleToken : shingle) {
if (spacerCharacter != null && sb.length() > 0) {
sb.append(spacerCharacter);
}
@@ -493,22 +492,19 @@
// get rid of resources
// delete the first column in the matrix
- Matrix.Column deletedColumn = (Matrix.Column) matrix.columns.remove(0);
+ Matrix.Column deletedColumn = matrix.columns.remove(0);
// remove all shingles seen that include any of the tokens from the deleted column.
- List deletedColumnTokens = new ArrayList();
- for (Iterator iterator = deletedColumn.getRows().iterator(); iterator.hasNext();) {
- Matrix.Column.Row row = (Matrix.Column.Row) iterator.next();
- for (Iterator rowIter = row.getTokens().iterator(); rowIter.hasNext();) {
- Object o = rowIter.next();//Token
- deletedColumnTokens.add(o);
+ List deletedColumnTokens = new ArrayList();
+ for (Matrix.Column.Row row : deletedColumn.getRows()) {
+ for (Token token : row.getTokens()) {
+ deletedColumnTokens.add(token);
}
}
- for (Iterator shinglesSeenIterator = shinglesSeen.iterator(); shinglesSeenIterator.hasNext();) {
- List shingle = (List) shinglesSeenIterator.next();
- for (Iterator deletedIter = deletedColumnTokens.iterator(); deletedIter.hasNext();) {
- Token deletedColumnToken = (Token) deletedIter.next();
+ for (Iterator> shinglesSeenIterator = shinglesSeen.iterator(); shinglesSeenIterator.hasNext();) {
+ List shingle = shinglesSeenIterator.next();
+ for (Token deletedColumnToken : deletedColumnTokens) {
if (shingle.contains(deletedColumnToken)) {
shinglesSeenIterator.remove();
break;
@@ -552,14 +548,12 @@
* finally resets the current (next) shingle size and offset.
*/
private void nextTokensPermutation() {
- Matrix.Column.Row[] rowsPermutation;
- rowsPermutation = (Matrix.Column.Row[]) permutations.next();
- List currentPermutationRows = new ArrayList();
- List currentPermuationTokens = new ArrayList();
- for (int i = 0; i < rowsPermutation.length; i++) {
- Matrix.Column.Row row = rowsPermutation[i];
- for (Iterator iterator = row.getTokens().iterator(); iterator.hasNext();) {
- currentPermuationTokens.add(iterator.next());
+ Matrix.Column.Row[] rowsPermutation = permutations.next();
+ List currentPermutationRows = new ArrayList();
+ List currentPermuationTokens = new ArrayList();
+ for (Matrix.Column.Row row : rowsPermutation) {
+ for (Token token : row.getTokens()) {
+ currentPermuationTokens.add(token);
currentPermutationRows.add(row);
}
}
@@ -627,8 +621,7 @@
double factor = 1d / Math.sqrt(total);
double weight = 0d;
- for (int i = 0; i < weights.length; i++) {
- double partWeight = weights[i];
+ for (double partWeight : weights) {
weight += partWeight * factor;
}
@@ -709,7 +702,7 @@
private boolean columnsHasBeenCreated = false;
- private List columns = new ArrayList();
+ private List columns = new ArrayList();
public List getColumns() {
return columns;
@@ -740,9 +733,9 @@
Matrix.this.columns.add(this);
}
- private List rows = new ArrayList();
+ private List rows = new ArrayList();
- public List getRows() {
+ public List getRows() {
return rows;
}
@@ -781,7 +774,7 @@
return Column.this;
}
- private List tokens = new LinkedList();
+ private List tokens = new LinkedList();
public Row() {
Column.this.rows.add(this);
@@ -791,11 +784,11 @@
return Column.this.rows.indexOf(this);
}
- public List getTokens() {
+ public List getTokens() {
return tokens;
}
- public void setTokens(List tokens) {
+ public void setTokens(List tokens) {
this.tokens = tokens;
}
@@ -826,9 +819,9 @@
}
- public Iterator permutationIterator() {
+ public Iterator permutationIterator() {
- return new Iterator() {
+ return new Iterator() {
private int[] columnRowCounters = new int[columns.size()];
@@ -838,10 +831,10 @@
public boolean hasNext() {
int s = columnRowCounters.length;
- return s != 0 && columnRowCounters[s - 1] < ((Column) columns.get(s - 1)).getRows().size();
+ return s != 0 && columnRowCounters[s - 1] < (columns.get(s - 1)).getRows().size();
}
- public Object next() {
+ public Column.Row[] next() {
if (!hasNext()) {
throw new NoSuchElementException("no more elements");
}
@@ -849,7 +842,7 @@
Column.Row[] rows = new Column.Row[columnRowCounters.length];
for (int i = 0; i < columnRowCounters.length; i++) {
- rows[i] = (Matrix.Column.Row) ((Column) columns.get(i)).rows.get(columnRowCounters[i]);
+ rows[i] = columns.get(i).rows.get(columnRowCounters[i]);
}
incrementColumnRowCounters();
@@ -859,7 +852,7 @@
private void incrementColumnRowCounters() {
for (int i = 0; i < columnRowCounters.length; i++) {
columnRowCounters[i]++;
- if (columnRowCounters[i] == ((Column) columns.get(i)).rows.size() &&
+ if (columnRowCounters[i] == columns.get(i).rows.size() &&
i < columnRowCounters.length - 1) {
columnRowCounters[i] = 0;
} else {