Details
-
Improvement
-
Status: Open
-
Major
-
Resolution: Unresolved
-
3.4
-
None
-
Windows
Description
Using RemoveDuplicatesTokenFilterFactory can not remove the duplicated term.
@Override
53 public boolean incrementToken() throws IOException {
54 while (input.incrementToken()) {
55 final char term[] = termAttribute.buffer();
56 final int length = termAttribute.length();
57 final int posIncrement = posIncAttribute.getPositionIncrement();
58
59 if (posIncrement > 0)
62
63 boolean duplicate = (posIncrement == 0 && previous.contains(term, 0, length));
64
65 // clone the term, and add to the set of seen terms.
66 char saved[] = new char[length];
67 System.arraycopy(term, 0, saved, 0, length);
68 previous.add(saved);
69
70 if (!duplicate)
73 }
74 return false;
75 }
it should be like following:
@Override
public boolean incrementToken() throws IOException {
while (input.incrementToken()) {
final char term[] = termAttribute.buffer();
final int length = termAttribute.length();
final int posIncrement = posIncAttribute.getPositionIncrement();
if (posIncrement > 0)
{ previous.clear(); }boolean duplicate = (posIncrement == 0 && previous.contains(term, 0, length));
if(duplicate )
{ return false; }else
{ // clone the term, and add to the set of seen terms. char saved[] = new char[length]; System.arraycopy(term, 0, saved, 0, length); previous.add(saved); } }
return true;
}