Index: solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/HTMLStripTransformer.java =================================================================== --- solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/HTMLStripTransformer.java (revision 940761) +++ solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/HTMLStripTransformer.java (working copy) @@ -16,7 +16,7 @@ */ package org.apache.solr.handler.dataimport; -import org.apache.solr.analysis.HTMLStripCharFilter; +import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter; import org.apache.lucene.analysis.CharReader; import java.io.IOException; Index: solr/src/test/test-files/htmlStripReaderTest.html (deleted) =================================================================== Index: solr/src/test/org/apache/solr/analysis/HTMLStripCharFilterTest.java (deleted) =================================================================== Index: solr/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java =================================================================== --- solr/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java (revision 940761) +++ solr/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java (working copy) @@ -311,7 +311,7 @@ NamedList indexPart = textType.get("index"); assertNotNull("expecting an index token analysis for field type 'charfilthtmlmap'", indexPart); - assertEquals(" whátëvêr ", indexPart.get("org.apache.solr.analysis.HTMLStripCharFilter")); + assertEquals(" whátëvêr ", indexPart.get("org.apache.lucene.analysis.charfilter.HTMLStripCharFilter")); assertEquals(" whatever ", indexPart.get("org.apache.lucene.analysis.charfilter.MappingCharFilter")); List tokenList = (List)indexPart.get("org.apache.lucene.analysis.WhitespaceTokenizer"); Index: solr/src/java/org/apache/solr/analysis/HTMLStripCharFilter.java (deleted) =================================================================== Index: solr/src/java/org/apache/solr/analysis/HTMLStripCharFilterFactory.java =================================================================== --- solr/src/java/org/apache/solr/analysis/HTMLStripCharFilterFactory.java (revision 940761) +++ solr/src/java/org/apache/solr/analysis/HTMLStripCharFilterFactory.java (working copy) @@ -19,6 +19,7 @@ */ import org.apache.lucene.analysis.CharStream; +import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter; public class HTMLStripCharFilterFactory extends BaseCharFilterFactory { Index: lucene/contrib/CHANGES.txt =================================================================== --- lucene/contrib/CHANGES.txt (revision 940761) +++ lucene/contrib/CHANGES.txt (working copy) @@ -159,6 +159,8 @@ New features from Solr now available to Lucene users include: - o.a.l.analysis.commongrams: Constructs n-grams for frequently occurring terms and phrases. + - o.a.l.analysis.charfilter.HTMLStripCharFilter: CharFilter that strips HTML + constructs. (... in progress) Build Index: lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java =================================================================== --- lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java (revision 940761) +++ lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.solr.analysis; +package org.apache.lucene.analysis.charfilter; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -21,6 +21,8 @@ import java.io.File; import java.io.FileReader; import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; import java.io.Reader; import java.io.StringReader; import java.util.HashSet; @@ -69,8 +71,8 @@ //Some sanity checks, but not a full-fledged check public void testHTML() throws Exception { - - HTMLStripCharFilter reader = new HTMLStripCharFilter(CharReader.get(new FileReader(new File("htmlStripReaderTest.html")))); + InputStream stream = getClass().getResourceAsStream("htmlStripReaderTest.html"); + HTMLStripCharFilter reader = new HTMLStripCharFilter(CharReader.get(new InputStreamReader(stream, "UTF-8"))); StringBuilder builder = new StringBuilder(); int ch = -1; while ((ch = reader.read()) != -1){ Index: lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java =================================================================== --- lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java (revision 940761) +++ lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.solr.analysis; +package org.apache.lucene.analysis.charfilter; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -30,8 +30,6 @@ /** * A CharFilter that wraps another Reader and attempts to strip out HTML constructs. - * - * @version $Id$ */ public class HTMLStripCharFilter extends BaseCharFilter { private int readAheadLimit = DEFAULT_READ_AHEAD;