Note that
+ * this does not do line-number counting, but instead keeps track of the
+ * character position of the token in the input, as required by Lucene's {@link
+ * org.apache.lucene.analysis.Token} API.
+ * */
+public final class FastCharStream implements CharStream {
+ char[] buffer = null;
+
+ int bufferLength = 0; // end of valid chars
+ int bufferPosition = 0; // next char to read
+
+ int tokenStart = 0; // offset in buffer
+ int bufferStart = 0; // position in file of buffer
+
+ Reader input; // source of chars
+
+ /** Constructs from a Reader. */
+ public FastCharStream(Reader r) {
+ input = r;
+ }
+
+ public final char readChar() throws IOException {
+ if (bufferPosition >= bufferLength)
+ refill();
+ return buffer[bufferPosition++];
+ }
+
+ private final void refill() throws IOException {
+ int newPosition = bufferLength - tokenStart;
+
+ if (tokenStart == 0) { // token won't fit in buffer
+ if (buffer == null) { // first time: alloc buffer
+ buffer = new char[2048];
+ } else if (bufferLength == buffer.length) { // grow buffer
+ char[] newBuffer = new char[buffer.length*2];
+ System.arraycopy(buffer, 0, newBuffer, 0, bufferLength);
+ buffer = newBuffer;
+ }
+ } else { // shift token to front
+ System.arraycopy(buffer, tokenStart, buffer, 0, newPosition);
+ }
+
+ bufferLength = newPosition; // update state
+ bufferPosition = newPosition;
+ bufferStart += tokenStart;
+ tokenStart = 0;
+
+ int charsRead = // fill space in buffer
+ input.read(buffer, newPosition, buffer.length-newPosition);
+ if (charsRead == -1)
+ throw new IOException("read past eof");
+ else
+ bufferLength += charsRead;
+ }
+
+ public final char BeginToken() throws IOException {
+ tokenStart = bufferPosition;
+ return readChar();
+ }
+
+ public final void backup(int amount) {
+ bufferPosition -= amount;
+ }
+
+ public final String GetImage() {
+ return new String(buffer, tokenStart, bufferPosition - tokenStart);
+ }
+
+ public final char[] GetSuffix(int len) {
+ char[] value = new char[len];
+ System.arraycopy(buffer, bufferPosition - len, value, 0, len);
+ return value;
+ }
+
+ public final void Done() {
+ try {
+ input.close();
+ } catch (IOException e) {
+ }
+ }
+
+ public final int getColumn() {
+ return bufferStart + bufferPosition;
+ }
+ public final int getLine() {
+ return 1;
+ }
+ public final int getEndColumn() {
+ return bufferStart + bufferPosition;
+ }
+ public final int getEndLine() {
+ return 1;
+ }
+ public final int getBeginColumn() {
+ return bufferStart + tokenStart;
+ }
+ public final int getBeginLine() {
+ return 1;
+ }
+}
Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/FastCharStream.java
===================================================================
--- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/FastCharStream.java (revision 1359191)
+++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/FastCharStream.java (working copy)
Property changes on: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/FastCharStream.java
___________________________________________________________________
Added: cvs2svn:cvs-rev
## -0,0 +1 ##
+1.3
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Revision
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java
===================================================================
--- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java (revision 1359190)
+++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java (working copy)
@@ -29,6 +29,10 @@
private MyPipedInputStream pipeInStream = null;
private PipedOutputStream pipeOutStream = null;
+ public HTMLParser(Reader reader) {
+ this(new FastCharStream(reader));
+ }
+
private class MyPipedInputStream extends PipedInputStream{
public MyPipedInputStream(){
@@ -227,7 +231,7 @@
Token t1, t2;
boolean inImg = false;
t1 = jj_consume_token(TagName);
- String tagName = t1.image.toLowerCase(Locale.ENGLISH);
+ String tagName = t1.image.toLowerCase(Locale.ROOT);
if(Tags.WS_ELEMS.contains(tagName) ) {
addSpace();
}
@@ -264,7 +268,7 @@
)
&& t2 != null)
{
- currentMetaTag=t2.image.toLowerCase(Locale.ENGLISH);
+ currentMetaTag=t2.image.toLowerCase(Locale.ROOT);
if(currentMetaTag != null && currentMetaContent != null) {
addMetaTag();
}
@@ -272,7 +276,7 @@
if(inMetaTag && t1.image.equalsIgnoreCase("content") && t2 !=
null)
{
- currentMetaContent=t2.image.toLowerCase(Locale.ENGLISH);
+ currentMetaContent=t2.image.toLowerCase(Locale.ROOT);
if(currentMetaTag != null && currentMetaContent != null) {
addMetaTag();
}
@@ -464,7 +468,6 @@
/** Generated Token Manager. */
public HTMLParserTokenManager token_source;
- SimpleCharStream jj_input_stream;
/** Current token. */
public Token token;
/** Next token. */
@@ -485,14 +488,9 @@
private boolean jj_rescan = false;
private int jj_gc = 0;
- /** Constructor with InputStream. */
- public HTMLParser(java.io.InputStream stream) {
- this(stream, null);
- }
- /** Constructor with InputStream and supplied encoding */
- public HTMLParser(java.io.InputStream stream, String encoding) {
- try { jj_input_stream = new SimpleCharStream(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); }
- token_source = new HTMLParserTokenManager(jj_input_stream);
+ /** Constructor with user supplied CharStream. */
+ public HTMLParser(CharStream stream) {
+ token_source = new HTMLParserTokenManager(stream);
token = new Token();
jj_ntk = -1;
jj_gen = 0;
@@ -501,13 +499,8 @@
}
/** Reinitialise. */
- public void ReInit(java.io.InputStream stream) {
- ReInit(stream, null);
- }
- /** Reinitialise. */
- public void ReInit(java.io.InputStream stream, String encoding) {
- try { jj_input_stream.ReInit(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); }
- token_source.ReInit(jj_input_stream);
+ public void ReInit(CharStream stream) {
+ token_source.ReInit(stream);
token = new Token();
jj_ntk = -1;
jj_gen = 0;
@@ -515,28 +508,6 @@
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
- /** Constructor. */
- public HTMLParser(java.io.Reader stream) {
- jj_input_stream = new SimpleCharStream(stream, 1, 1);
- token_source = new HTMLParserTokenManager(jj_input_stream);
- token = new Token();
- jj_ntk = -1;
- jj_gen = 0;
- for (int i = 0; i < 14; i++) jj_la1[i] = -1;
- for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
- }
-
- /** Reinitialise. */
- public void ReInit(java.io.Reader stream) {
- jj_input_stream.ReInit(stream, 1, 1);
- token_source.ReInit(jj_input_stream);
- token = new Token();
- jj_ntk = -1;
- jj_gen = 0;
- for (int i = 0; i < 14; i++) jj_la1[i] = -1;
- for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
- }
-
/** Constructor with generated Token Manager. */
public HTMLParser(HTMLParserTokenManager tm) {
token_source = tm;
@@ -631,7 +602,7 @@
return (jj_ntk = jj_nt.kind);
}
- private java.util.List
@@ -182,7 +183,7 @@
iwc.setInfoStream(System.err);
} else {
File f = new File(infoStreamVal).getAbsoluteFile();
- iwc.setInfoStream(new PrintStream(new BufferedOutputStream(new FileOutputStream(f))));
+ iwc.setInfoStream(new PrintStream(new BufferedOutputStream(new FileOutputStream(f)), false, Charset.defaultCharset().name()));
}
}
IndexWriter writer = new IndexWriter(runData.getDirectory(), iwc);
Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java
===================================================================
--- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java (revision 1359190)
+++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java (working copy)
@@ -17,6 +17,8 @@
* limitations under the License.
*/
+import java.util.Locale;
+
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.stats.Points;
import org.apache.lucene.benchmark.byTask.stats.TaskStats;
@@ -266,7 +268,7 @@
public void tearDown() throws Exception {
if (++logStepCount % logStep == 0) {
double time = (System.currentTimeMillis() - runData.getStartTimeMillis()) / 1000.0;
- System.out.println(String.format("%7.2f",time) + " sec --> "
+ System.out.println(String.format(Locale.ROOT, "%7.2f",time) + " sec --> "
+ Thread.currentThread().getName() + " " + getLogMessage(logStepCount));
}
}
Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java
===================================================================
--- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java (revision 1359190)
+++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java (working copy)
@@ -77,7 +77,7 @@
} else {
throw new RuntimeException("You must specify the sort type ie page:int,subject:string");
}
- sortField0 = new SortField(fieldName, SortField.Type.valueOf(typeString.toUpperCase(Locale.ENGLISH)));
+ sortField0 = new SortField(fieldName, SortField.Type.valueOf(typeString.toUpperCase(Locale.ROOT)));
}
sortFields[upto++] = sortField0;
}
Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java
===================================================================
--- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java (revision 1359190)
+++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java (working copy)
@@ -19,6 +19,7 @@
import java.util.ArrayList;
import java.util.List;
+import java.util.Locale;
import java.text.NumberFormat;
import org.apache.lucene.benchmark.byTask.PerfRunData;
@@ -428,7 +429,7 @@
sb.append(padd);
sb.append(!letChildReport ? ">" : (parallel ? "]" : "}"));
if (fixedTime) {
- sb.append(" " + NumberFormat.getNumberInstance().format(runTimeSec) + "s");
+ sb.append(" " + NumberFormat.getNumberInstance(Locale.ROOT).format(runTimeSec) + "s");
} else if (repetitions>1) {
sb.append(" * " + repetitions);
} else if (repetitions==REPEAT_EXHAUST) {
@@ -487,7 +488,7 @@
if (rate>0) {
seqName += "_" + rate + (perMin?"/min":"/sec");
}
- if (parallel && seqName.toLowerCase().indexOf("par")<0) {
+ if (parallel && seqName.toLowerCase(Locale.ROOT).indexOf("par")<0) {
seqName += "_Par";
}
}
Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java
===================================================================
--- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java (revision 1359190)
+++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java (working copy)
@@ -22,6 +22,7 @@
import java.lang.reflect.Constructor;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Locale;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.tasks.PerfTask;
@@ -159,7 +160,7 @@
} else {
stok.nextToken();
if (stok.ttype!=StreamTokenizer.TT_WORD) throw new Exception("expected rate unit: 'min' or 'sec' - "+stok.toString());
- String unit = stok.sval.toLowerCase();
+ String unit = stok.sval.toLowerCase(Locale.ROOT);
if ("min".equals(unit)) {
((TaskSequence)prevTask).setRate((int)stok.nval,true); // set rate per min
} else if ("sec".equals(unit)) {
Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Format.java
===================================================================
--- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Format.java (revision 1359190)
+++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Format.java (working copy)
@@ -18,6 +18,7 @@
*/
import java.text.NumberFormat;
+import java.util.Locale;
/**
* Formatting utilities (for reports).
@@ -25,9 +26,9 @@
public class Format {
private static NumberFormat numFormat [] = {
- NumberFormat.getInstance(),
- NumberFormat.getInstance(),
- NumberFormat.getInstance(),
+ NumberFormat.getInstance(Locale.ROOT),
+ NumberFormat.getInstance(Locale.ROOT),
+ NumberFormat.getInstance(Locale.ROOT),
};
private static final String padd = " ";
Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StreamUtils.java
===================================================================
--- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StreamUtils.java (revision 1359190)
+++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StreamUtils.java (working copy)
@@ -99,7 +99,7 @@
String fileName = file.getName();
int idx = fileName.lastIndexOf('.');
if (idx != -1) {
- type = extensionToType.get(fileName.substring(idx).toLowerCase(Locale.ENGLISH));
+ type = extensionToType.get(fileName.substring(idx).toLowerCase(Locale.ROOT));
}
return type==null ? Type.PLAIN : type;
}
Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityStats.java
===================================================================
--- lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityStats.java (revision 1359190)
+++ lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityStats.java (working copy)
@@ -19,6 +19,7 @@
import java.io.PrintWriter;
import java.text.NumberFormat;
import java.util.ArrayList;
+import java.util.Locale;
/**
* Results of quality benchmark run for a single query or for a set of queries.
@@ -141,7 +142,7 @@
logger.println(title);
}
prefix = prefix==null ? "" : prefix;
- NumberFormat nf = NumberFormat.getInstance();
+ NumberFormat nf = NumberFormat.getInstance(Locale.ROOT);
nf.setMaximumFractionDigits(3);
nf.setMinimumFractionDigits(3);
nf.setGroupingUsed(true);
Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java
===================================================================
--- lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java (revision 1359190)
+++ lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java (working copy)
@@ -24,11 +24,13 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.IOUtils;
import java.io.BufferedReader;
import java.io.File;
-import java.io.FileReader;
+import java.io.OutputStreamWriter;
import java.io.PrintWriter;
+import java.nio.charset.Charset;
import java.util.HashSet;
import java.util.Set;
@@ -51,7 +53,7 @@
File topicsFile = new File(args[0]);
File qrelsFile = new File(args[1]);
- SubmissionReport submitLog = new SubmissionReport(new PrintWriter(args[2]), "lucene");
+ SubmissionReport submitLog = new SubmissionReport(new PrintWriter(args[2], "UTF-8"), "lucene");
FSDirectory dir = FSDirectory.open(new File(args[3]));
String fieldSpec = args.length == 5 ? args[4] : "T"; // default to Title-only if not specified.
IndexReader reader = DirectoryReader.open(dir);
@@ -60,14 +62,14 @@
int maxResults = 1000;
String docNameField = "docname";
- PrintWriter logger = new PrintWriter(System.out, true);
+ PrintWriter logger = new PrintWriter(new OutputStreamWriter(System.out, Charset.defaultCharset()), true);
// use trec utilities to read trec topics into quality queries
TrecTopicsReader qReader = new TrecTopicsReader();
- QualityQuery qqs[] = qReader.readQueries(new BufferedReader(new FileReader(topicsFile)));
+ QualityQuery qqs[] = qReader.readQueries(new BufferedReader(IOUtils.getDecodingReader(topicsFile, IOUtils.CHARSET_UTF_8)));
// prepare judge, with trec utilities that read from a QRels file
- Judge judge = new TrecJudge(new BufferedReader(new FileReader(qrelsFile)));
+ Judge judge = new TrecJudge(new BufferedReader(IOUtils.getDecodingReader(qrelsFile, IOUtils.CHARSET_UTF_8)));
// validate topics & judgments match each other
judge.validateData(qqs, logger);
Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java
===================================================================
--- lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java (revision 1359190)
+++ lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java (working copy)
@@ -19,6 +19,7 @@
import java.io.IOException;
import java.io.PrintWriter;
import java.text.NumberFormat;
+import java.util.Locale;
import org.apache.lucene.benchmark.quality.QualityQuery;
import org.apache.lucene.search.ScoreDoc;
@@ -45,7 +46,7 @@
public SubmissionReport (PrintWriter logger, String name) {
this.logger = logger;
this.name = name;
- nf = NumberFormat.getInstance();
+ nf = NumberFormat.getInstance(Locale.ROOT);
nf.setMaximumFractionDigits(4);
nf.setMinimumFractionDigits(4);
}
Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java
===================================================================
--- lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java (revision 1359190)
+++ lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java (working copy)
@@ -19,13 +19,19 @@
import java.io.BufferedReader;
import java.io.File;
import java.io.FileFilter;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import org.apache.lucene.util.IOUtils;
+
/**
* Split the Reuters SGML documents into Simple Text files containing: Title, Date, Dateline, Body
*/
@@ -73,7 +79,7 @@
*/
protected void extractFile(File sgmFile) {
try {
- BufferedReader reader = new BufferedReader(new FileReader(sgmFile));
+ BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(sgmFile), IOUtils.CHARSET_UTF_8));
StringBuilder buffer = new StringBuilder(1024);
StringBuilder outBuffer = new StringBuilder(1024);
@@ -107,7 +113,7 @@
File outFile = new File(outputDir, sgmFile.getName() + "-"
+ (docNumber++) + ".txt");
// System.out.println("Writing " + outFile);
- FileWriter writer = new FileWriter(outFile);
+ OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(outFile), IOUtils.CHARSET_UTF_8);
writer.write(out);
writer.close();
outBuffer.setLength(0);
Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractWikipedia.java
===================================================================
--- lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractWikipedia.java (revision 1359190)
+++ lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractWikipedia.java (working copy)
@@ -18,8 +18,10 @@
*/
import java.io.File;
-import java.io.FileWriter;
+import java.io.FileOutputStream;
import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
import java.util.Properties;
import org.apache.lucene.benchmark.byTask.feeds.ContentSource;
@@ -28,6 +30,7 @@
import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.document.Document;
+import org.apache.lucene.util.IOUtils;
/**
* Extract the downloaded Wikipedia dump into separate files for indexing.
@@ -83,7 +86,7 @@
contents.append("\n");
try {
- FileWriter writer = new FileWriter(f);
+ Writer writer = new OutputStreamWriter(new FileOutputStream(f), IOUtils.CHARSET_UTF_8);
writer.write(contents.toString());
writer.close();
} catch (IOException ioe) {
Index: lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java
===================================================================
--- lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java (revision 1359190)
+++ lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java (working copy)
@@ -166,7 +166,7 @@
// DocMaker did not close its ContentSource if resetInputs was called twice,
// leading to a file handle leak.
File f = new File(getWorkDir(), "docMakerLeak.txt");
- PrintStream ps = new PrintStream(f);
+ PrintStream ps = new PrintStream(f, "UTF-8");
ps.println("one title\t" + System.currentTimeMillis() + "\tsome content");
ps.close();
Index: lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTaskTest.java
===================================================================
--- lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTaskTest.java (revision 1359190)
+++ lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTaskTest.java (working copy)
@@ -20,6 +20,7 @@
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.PrintStream;
+import java.nio.charset.Charset;
import java.util.Properties;
import org.apache.lucene.benchmark.BenchmarkTestCase;
@@ -50,7 +51,7 @@
PrintStream curOut = System.out;
ByteArrayOutputStream baos = new ByteArrayOutputStream();
- System.setOut(new PrintStream(baos));
+ System.setOut(new PrintStream(baos, false, Charset.defaultCharset().name()));
try {
PerfRunData runData = createPerfRunData("SystemOut");
CreateIndexTask cit = new CreateIndexTask(runData);
@@ -63,7 +64,7 @@
PrintStream curErr = System.err;
baos.reset();
- System.setErr(new PrintStream(baos));
+ System.setErr(new PrintStream(baos, false, Charset.defaultCharset().name()));
try {
PerfRunData runData = createPerfRunData("SystemErr");
CreateIndexTask cit = new CreateIndexTask(runData);
Index: lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/utils/StreamUtilsTest.java
===================================================================
--- lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/utils/StreamUtilsTest.java (revision 1359190)
+++ lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/utils/StreamUtilsTest.java (working copy)
@@ -31,6 +31,7 @@
import org.apache.commons.compress.compressors.CompressorStreamFactory;
import org.apache.lucene.benchmark.BenchmarkTestCase;
import org.apache.lucene.benchmark.byTask.utils.StreamUtils;
+import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util._TestUtil;
import org.junit.After;
import org.junit.Before;
@@ -88,7 +89,7 @@
private File rawTextFile(String ext) throws Exception {
File f = new File(testDir,"testfile." + ext);
- BufferedWriter w = new BufferedWriter(new FileWriter(f));
+ BufferedWriter w = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f), IOUtils.CHARSET_UTF_8));
w.write(TEXT);
w.newLine();
w.close();
@@ -117,7 +118,7 @@
}
private void writeText(OutputStream os) throws IOException {
- BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os));
+ BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os, IOUtils.CHARSET_UTF_8));
w.write(TEXT);
w.newLine();
w.close();
@@ -125,7 +126,7 @@
private void assertReadText(File f) throws Exception {
InputStream ir = StreamUtils.inputStream(f);
- InputStreamReader in = new InputStreamReader(ir);
+ InputStreamReader in = new InputStreamReader(ir, IOUtils.CHARSET_UTF_8);
BufferedReader r = new BufferedReader(in);
String line = r.readLine();
assertEquals("Wrong text found in "+f.getName(), TEXT, line);
Index: lucene/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java
===================================================================
--- lucene/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java (revision 1359190)
+++ lucene/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java (working copy)
@@ -31,7 +31,9 @@
import java.io.File;
import java.io.InputStream;
import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
import java.io.PrintWriter;
+import java.nio.charset.Charset;
/**
* Test that quality run does its job.
@@ -55,7 +57,7 @@
int maxResults = 1000;
String docNameField = "doctitle"; // orig docID is in the linedoc format title
- PrintWriter logger = VERBOSE ? new PrintWriter(System.out,true) : null;
+ PrintWriter logger = VERBOSE ? new PrintWriter(new OutputStreamWriter(System.out, Charset.defaultCharset()),true) : null;
// prepare topics
InputStream topics = getClass().getResourceAsStream("trecTopics.txt");
Index: lucene/build.xml
===================================================================
--- lucene/build.xml (revision 1359190)
+++ lucene/build.xml (working copy)
@@ -169,11 +169,19 @@
Note that
+ * this does not do line-number counting, but instead keeps track of the
+ * character position of the token in the input, as required by Lucene's {@link
+ * org.apache.lucene.analysis.Token} API.
+ * */
+public final class FastCharStream implements CharStream {
+ char[] buffer = null;
+
+ int bufferLength = 0; // end of valid chars
+ int bufferPosition = 0; // next char to read
+
+ int tokenStart = 0; // offset in buffer
+ int bufferStart = 0; // position in file of buffer
+
+ Reader input; // source of chars
+
+ /** Constructs from a Reader. */
+ public FastCharStream(Reader r) {
+ input = r;
+ }
+
+ public final char readChar() throws IOException {
+ if (bufferPosition >= bufferLength)
+ refill();
+ return buffer[bufferPosition++];
+ }
+
+ private final void refill() throws IOException {
+ int newPosition = bufferLength - tokenStart;
+
+ if (tokenStart == 0) { // token won't fit in buffer
+ if (buffer == null) { // first time: alloc buffer
+ buffer = new char[2048];
+ } else if (bufferLength == buffer.length) { // grow buffer
+ char[] newBuffer = new char[buffer.length*2];
+ System.arraycopy(buffer, 0, newBuffer, 0, bufferLength);
+ buffer = newBuffer;
+ }
+ } else { // shift token to front
+ System.arraycopy(buffer, tokenStart, buffer, 0, newPosition);
+ }
+
+ bufferLength = newPosition; // update state
+ bufferPosition = newPosition;
+ bufferStart += tokenStart;
+ tokenStart = 0;
+
+ int charsRead = // fill space in buffer
+ input.read(buffer, newPosition, buffer.length-newPosition);
+ if (charsRead == -1)
+ throw new IOException("read past eof");
+ else
+ bufferLength += charsRead;
+ }
+
+ public final char BeginToken() throws IOException {
+ tokenStart = bufferPosition;
+ return readChar();
+ }
+
+ public final void backup(int amount) {
+ bufferPosition -= amount;
+ }
+
+ public final String GetImage() {
+ return new String(buffer, tokenStart, bufferPosition - tokenStart);
+ }
+
+ public final char[] GetSuffix(int len) {
+ char[] value = new char[len];
+ System.arraycopy(buffer, bufferPosition - len, value, 0, len);
+ return value;
+ }
+
+ public final void Done() {
+ try {
+ input.close();
+ } catch (IOException e) {
+ }
+ }
+
+ public final int getColumn() {
+ return bufferStart + bufferPosition;
+ }
+ public final int getLine() {
+ return 1;
+ }
+ public final int getEndColumn() {
+ return bufferStart + bufferPosition;
+ }
+ public final int getEndLine() {
+ return 1;
+ }
+ public final int getBeginColumn() {
+ return bufferStart + tokenStart;
+ }
+ public final int getBeginLine() {
+ return 1;
+ }
+}
Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/FastCharStream.java
===================================================================
--- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/FastCharStream.java (revision 1359191)
+++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/FastCharStream.java (working copy)
Property changes on: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/FastCharStream.java
___________________________________________________________________
Added: cvs2svn:cvs-rev
## -0,0 +1 ##
+1.3
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Revision
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/JavaCharStream.java
===================================================================
--- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/JavaCharStream.java (revision 1359190)
+++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/JavaCharStream.java (working copy)
@@ -1,616 +0,0 @@
-/* Generated By:JavaCC: Do not edit this line. JavaCharStream.java Version 4.1 */
-/* JavaCCOptions:STATIC=false */
-package org.apache.lucene.queryparser.flexible.standard.parser;
-
-/**
- * An implementation of interface CharStream, where the stream is assumed to
- * contain only ASCII characters (with java-like unicode escape processing).
- */
-
-public class JavaCharStream
-{
-/** Whether parser is static. */
- public static final boolean staticFlag = false;
- static final int hexval(char c) throws java.io.IOException {
- switch(c)
- {
- case '0' :
- return 0;
- case '1' :
- return 1;
- case '2' :
- return 2;
- case '3' :
- return 3;
- case '4' :
- return 4;
- case '5' :
- return 5;
- case '6' :
- return 6;
- case '7' :
- return 7;
- case '8' :
- return 8;
- case '9' :
- return 9;
-
- case 'a' :
- case 'A' :
- return 10;
- case 'b' :
- case 'B' :
- return 11;
- case 'c' :
- case 'C' :
- return 12;
- case 'd' :
- case 'D' :
- return 13;
- case 'e' :
- case 'E' :
- return 14;
- case 'f' :
- case 'F' :
- return 15;
- }
-
- throw new java.io.IOException(); // Should never come here
- }
-
-/** Position in buffer. */
- public int bufpos = -1;
- int bufsize;
- int available;
- int tokenBegin;
- protected int bufline[];
- protected int bufcolumn[];
-
- protected int column = 0;
- protected int line = 1;
-
- protected boolean prevCharIsCR = false;
- protected boolean prevCharIsLF = false;
-
- protected java.io.Reader inputStream;
-
- protected char[] nextCharBuf;
- protected char[] buffer;
- protected int maxNextCharInd = 0;
- protected int nextCharInd = -1;
- protected int inBuf = 0;
- protected int tabSize = 8;
-
- protected void setTabSize(int i) { tabSize = i; }
- protected int getTabSize(int i) { return tabSize; }
-
- protected void ExpandBuff(boolean wrapAround)
- {
- char[] newbuffer = new char[bufsize + 2048];
- int newbufline[] = new int[bufsize + 2048];
- int newbufcolumn[] = new int[bufsize + 2048];
-
- try
- {
- if (wrapAround)
- {
- System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin);
- System.arraycopy(buffer, 0, newbuffer,
- bufsize - tokenBegin, bufpos);
- buffer = newbuffer;
-
- System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin);
- System.arraycopy(bufline, 0, newbufline, bufsize - tokenBegin, bufpos);
- bufline = newbufline;
-
- System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin);
- System.arraycopy(bufcolumn, 0, newbufcolumn, bufsize - tokenBegin, bufpos);
- bufcolumn = newbufcolumn;
-
- bufpos += (bufsize - tokenBegin);
- }
- else
- {
- System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin);
- buffer = newbuffer;
-
- System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin);
- bufline = newbufline;
-
- System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin);
- bufcolumn = newbufcolumn;
-
- bufpos -= tokenBegin;
- }
- }
- catch (Throwable t)
- {
- throw new Error(t.getMessage());
- }
-
- available = (bufsize += 2048);
- tokenBegin = 0;
- }
-
- protected void FillBuff() throws java.io.IOException
- {
- int i;
- if (maxNextCharInd == 4096)
- maxNextCharInd = nextCharInd = 0;
-
- try {
- if ((i = inputStream.read(nextCharBuf, maxNextCharInd,
- 4096 - maxNextCharInd)) == -1)
- {
- inputStream.close();
- throw new java.io.IOException();
- }
- else
- maxNextCharInd += i;
- return;
- }
- catch(java.io.IOException e) {
- if (bufpos != 0)
- {
- --bufpos;
- backup(0);
- }
- else
- {
- bufline[bufpos] = line;
- bufcolumn[bufpos] = column;
- }
- throw e;
- }
- }
-
- protected char ReadByte() throws java.io.IOException
- {
- if (++nextCharInd >= maxNextCharInd)
- FillBuff();
-
- return nextCharBuf[nextCharInd];
- }
-
-/** @return starting character for token. */
- public char BeginToken() throws java.io.IOException
- {
- if (inBuf > 0)
- {
- --inBuf;
-
- if (++bufpos == bufsize)
- bufpos = 0;
-
- tokenBegin = bufpos;
- return buffer[bufpos];
- }
-
- tokenBegin = 0;
- bufpos = -1;
-
- return readChar();
- }
-
- protected void AdjustBuffSize()
- {
- if (available == bufsize)
- {
- if (tokenBegin > 2048)
- {
- bufpos = 0;
- available = tokenBegin;
- }
- else
- ExpandBuff(false);
- }
- else if (available > tokenBegin)
- available = bufsize;
- else if ((tokenBegin - available) < 2048)
- ExpandBuff(true);
- else
- available = tokenBegin;
- }
-
- protected void UpdateLineColumn(char c)
- {
- column++;
-
- if (prevCharIsLF)
- {
- prevCharIsLF = false;
- line += (column = 1);
- }
- else if (prevCharIsCR)
- {
- prevCharIsCR = false;
- if (c == '\n')
- {
- prevCharIsLF = true;
- }
- else
- line += (column = 1);
- }
-
- switch (c)
- {
- case '\r' :
- prevCharIsCR = true;
- break;
- case '\n' :
- prevCharIsLF = true;
- break;
- case '\t' :
- column--;
- column += (tabSize - (column % tabSize));
- break;
- default :
- break;
- }
-
- bufline[bufpos] = line;
- bufcolumn[bufpos] = column;
- }
-
-/** Read a character. */
- public char readChar() throws java.io.IOException
- {
- if (inBuf > 0)
- {
- --inBuf;
-
- if (++bufpos == bufsize)
- bufpos = 0;
-
- return buffer[bufpos];
- }
-
- char c;
-
- if (++bufpos == available)
- AdjustBuffSize();
-
- if ((buffer[bufpos] = c = ReadByte()) == '\\')
- {
- UpdateLineColumn(c);
-
- int backSlashCnt = 1;
-
- for (;;) // Read all the backslashes
- {
- if (++bufpos == available)
- AdjustBuffSize();
-
- try
- {
- if ((buffer[bufpos] = c = ReadByte()) != '\\')
- {
- UpdateLineColumn(c);
- // found a non-backslash char.
- if ((c == 'u') && ((backSlashCnt & 1) == 1))
- {
- if (--bufpos < 0)
- bufpos = bufsize - 1;
-
- break;
- }
-
- backup(backSlashCnt);
- return '\\';
- }
- }
- catch(java.io.IOException e)
- {
- if (backSlashCnt > 1)
- backup(backSlashCnt-1);
-
- return '\\';
- }
-
- UpdateLineColumn(c);
- backSlashCnt++;
- }
-
- // Here, we have seen an odd number of backslash's followed by a 'u'
- try
- {
- while ((c = ReadByte()) == 'u')
- ++column;
-
- buffer[bufpos] = c = (char)(hexval(c) << 12 |
- hexval(ReadByte()) << 8 |
- hexval(ReadByte()) << 4 |
- hexval(ReadByte()));
-
- column += 4;
- }
- catch(java.io.IOException e)
- {
- throw new Error("Invalid escape character at line " + line +
- " column " + column + ".");
- }
-
- if (backSlashCnt == 1)
- return c;
- else
- {
- backup(backSlashCnt - 1);
- return '\\';
- }
- }
- else
- {
- UpdateLineColumn(c);
- return c;
- }
- }
-
- @Deprecated
- /**
- * @deprecated
- * @see #getEndColumn
- */
- public int getColumn() {
- return bufcolumn[bufpos];
- }
-
- @Deprecated
- /**
- * @deprecated
- * @see #getEndLine
- */
- public int getLine() {
- return bufline[bufpos];
- }
-
-/** Get end column. */
- public int getEndColumn() {
- return bufcolumn[bufpos];
- }
-
-/** Get end line. */
- public int getEndLine() {
- return bufline[bufpos];
- }
-
-/** @return column of token start */
- public int getBeginColumn() {
- return bufcolumn[tokenBegin];
- }
-
-/** @return line number of token start */
- public int getBeginLine() {
- return bufline[tokenBegin];
- }
-
-/** Retreat. */
- public void backup(int amount) {
-
- inBuf += amount;
- if ((bufpos -= amount) < 0)
- bufpos += bufsize;
- }
-
-/** Constructor. */
- public JavaCharStream(java.io.Reader dstream,
- int startline, int startcolumn, int buffersize)
- {
- inputStream = dstream;
- line = startline;
- column = startcolumn - 1;
-
- available = bufsize = buffersize;
- buffer = new char[buffersize];
- bufline = new int[buffersize];
- bufcolumn = new int[buffersize];
- nextCharBuf = new char[4096];
- }
-
-/** Constructor. */
- public JavaCharStream(java.io.Reader dstream,
- int startline, int startcolumn)
- {
- this(dstream, startline, startcolumn, 4096);
- }
-
-/** Constructor. */
- public JavaCharStream(java.io.Reader dstream)
- {
- this(dstream, 1, 1, 4096);
- }
-/** Reinitialise. */
- public void ReInit(java.io.Reader dstream,
- int startline, int startcolumn, int buffersize)
- {
- inputStream = dstream;
- line = startline;
- column = startcolumn - 1;
-
- if (buffer == null || buffersize != buffer.length)
- {
- available = bufsize = buffersize;
- buffer = new char[buffersize];
- bufline = new int[buffersize];
- bufcolumn = new int[buffersize];
- nextCharBuf = new char[4096];
- }
- prevCharIsLF = prevCharIsCR = false;
- tokenBegin = inBuf = maxNextCharInd = 0;
- nextCharInd = bufpos = -1;
- }
-
-/** Reinitialise. */
- public void ReInit(java.io.Reader dstream,
- int startline, int startcolumn)
- {
- ReInit(dstream, startline, startcolumn, 4096);
- }
-
-/** Reinitialise. */
- public void ReInit(java.io.Reader dstream)
- {
- ReInit(dstream, 1, 1, 4096);
- }
-/** Constructor. */
- public JavaCharStream(java.io.InputStream dstream, String encoding, int startline,
- int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException
- {
- this(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize);
- }
-
-/** Constructor. */
- public JavaCharStream(java.io.InputStream dstream, int startline,
- int startcolumn, int buffersize)
- {
- this(new java.io.InputStreamReader(dstream), startline, startcolumn, 4096);
- }
-
-/** Constructor. */
- public JavaCharStream(java.io.InputStream dstream, String encoding, int startline,
- int startcolumn) throws java.io.UnsupportedEncodingException
- {
- this(dstream, encoding, startline, startcolumn, 4096);
- }
-
-/** Constructor. */
- public JavaCharStream(java.io.InputStream dstream, int startline,
- int startcolumn)
- {
- this(dstream, startline, startcolumn, 4096);
- }
-
-/** Constructor. */
- public JavaCharStream(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException
- {
- this(dstream, encoding, 1, 1, 4096);
- }
-
-/** Constructor. */
- public JavaCharStream(java.io.InputStream dstream)
- {
- this(dstream, 1, 1, 4096);
- }
-
-/** Reinitialise. */
- public void ReInit(java.io.InputStream dstream, String encoding, int startline,
- int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException
- {
- ReInit(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize);
- }
-
-/** Reinitialise. */
- public void ReInit(java.io.InputStream dstream, int startline,
- int startcolumn, int buffersize)
- {
- ReInit(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize);
- }
-/** Reinitialise. */
- public void ReInit(java.io.InputStream dstream, String encoding, int startline,
- int startcolumn) throws java.io.UnsupportedEncodingException
- {
- ReInit(dstream, encoding, startline, startcolumn, 4096);
- }
-/** Reinitialise. */
- public void ReInit(java.io.InputStream dstream, int startline,
- int startcolumn)
- {
- ReInit(dstream, startline, startcolumn, 4096);
- }
-/** Reinitialise. */
- public void ReInit(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException
- {
- ReInit(dstream, encoding, 1, 1, 4096);
- }
-
-/** Reinitialise. */
- public void ReInit(java.io.InputStream dstream)
- {
- ReInit(dstream, 1, 1, 4096);
- }
-
- /** @return token image as String */
- public String GetImage()
- {
- if (bufpos >= tokenBegin)
- return new String(buffer, tokenBegin, bufpos - tokenBegin + 1);
- else
- return new String(buffer, tokenBegin, bufsize - tokenBegin) +
- new String(buffer, 0, bufpos + 1);
- }
-
- /** @return suffix */
- public char[] GetSuffix(int len)
- {
- char[] ret = new char[len];
-
- if ((bufpos + 1) >= len)
- System.arraycopy(buffer, bufpos - len + 1, ret, 0, len);
- else
- {
- System.arraycopy(buffer, bufsize - (len - bufpos - 1), ret, 0,
- len - bufpos - 1);
- System.arraycopy(buffer, 0, ret, len - bufpos - 1, bufpos + 1);
- }
-
- return ret;
- }
-
- /** Set buffers back to null when finished. */
- public void Done()
- {
- nextCharBuf = null;
- buffer = null;
- bufline = null;
- bufcolumn = null;
- }
-
- /**
- * Method to adjust line and column numbers for the start of a token.
- */
- public void adjustBeginLineColumn(int newLine, int newCol)
- {
- int start = tokenBegin;
- int len;
-
- if (bufpos >= tokenBegin)
- {
- len = bufpos - tokenBegin + inBuf + 1;
- }
- else
- {
- len = bufsize - tokenBegin + bufpos + 1 + inBuf;
- }
-
- int i = 0, j = 0, k = 0;
- int nextColDiff = 0, columnDiff = 0;
-
- while (i < len &&
- bufline[j = start % bufsize] == bufline[k = ++start % bufsize])
- {
- bufline[j] = newLine;
- nextColDiff = columnDiff + bufcolumn[k] - bufcolumn[j];
- bufcolumn[j] = newCol + columnDiff;
- columnDiff = nextColDiff;
- i++;
- }
-
- if (i < len)
- {
- bufline[j] = newLine++;
- bufcolumn[j] = newCol + columnDiff;
-
- while (i++ < len)
- {
- if (bufline[j = start % bufsize] != bufline[++start % bufsize])
- bufline[j] = newLine++;
- else
- bufline[j] = newLine;
- }
- }
-
- line = bufline[j];
- column = bufcolumn[j];
- }
-
-}
-/* JavaCC - OriginalChecksum=7eecaeeaea1254b3e35fe8890a0127ce (do not edit this line) */
\ No newline at end of file
Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/ParseException.java
===================================================================
--- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/ParseException.java (revision 1359190)
+++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/ParseException.java (working copy)
@@ -193,4 +193,4 @@
}
}
-/* JavaCC - OriginalChecksum=0f25f4245374bbf9920c9a82efecadd2 (do not edit this line) */
+/* JavaCC - OriginalChecksum=7601d49d11bc059457ae5850628ebc8a (do not edit this line) */
Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.java
===================================================================
--- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.java (revision 1359190)
+++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.java (working copy)
@@ -1,7 +1,7 @@
/* Generated By:JavaCC: Do not edit this line. StandardSyntaxParser.java */
package org.apache.lucene.queryparser.flexible.standard.parser;
-/*
+/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -49,14 +49,14 @@
// syntax parser constructor
public StandardSyntaxParser() {
- this(new StringReader(""));
+ this(new FastCharStream(new StringReader("")));
}
/** Parses a query string, returning a {@link org.apache.lucene.queryparser.flexible.core.nodes.QueryNode}.
* @param query the query string to be parsed.
* @throws ParseException if the parsing fails
*/
public QueryNode parse(CharSequence query, CharSequence field) throws QueryNodeParseException {
- ReInit(new StringReader(query.toString()));
+ ReInit(new FastCharStream(new StringReader(query.toString())));
try {
// TopLevelQuery is a Query followed by the end-of-input (EOF)
QueryNode querynode = TopLevelQuery(field);
@@ -844,7 +844,6 @@
/** Generated Token Manager. */
public StandardSyntaxParserTokenManager token_source;
- JavaCharStream jj_input_stream;
/** Current token. */
public Token token;
/** Next token. */
@@ -870,14 +869,9 @@
private boolean jj_rescan = false;
private int jj_gc = 0;
- /** Constructor with InputStream. */
- public StandardSyntaxParser(java.io.InputStream stream) {
- this(stream, null);
- }
- /** Constructor with InputStream and supplied encoding */
- public StandardSyntaxParser(java.io.InputStream stream, String encoding) {
- try { jj_input_stream = new JavaCharStream(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); }
- token_source = new StandardSyntaxParserTokenManager(jj_input_stream);
+ /** Constructor with user supplied CharStream. */
+ public StandardSyntaxParser(CharStream stream) {
+ token_source = new StandardSyntaxParserTokenManager(stream);
token = new Token();
jj_ntk = -1;
jj_gen = 0;
@@ -886,13 +880,8 @@
}
/** Reinitialise. */
- public void ReInit(java.io.InputStream stream) {
- ReInit(stream, null);
- }
- /** Reinitialise. */
- public void ReInit(java.io.InputStream stream, String encoding) {
- try { jj_input_stream.ReInit(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); }
- token_source.ReInit(jj_input_stream);
+ public void ReInit(CharStream stream) {
+ token_source.ReInit(stream);
token = new Token();
jj_ntk = -1;
jj_gen = 0;
@@ -900,28 +889,6 @@
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
- /** Constructor. */
- public StandardSyntaxParser(java.io.Reader stream) {
- jj_input_stream = new JavaCharStream(stream, 1, 1);
- token_source = new StandardSyntaxParserTokenManager(jj_input_stream);
- token = new Token();
- jj_ntk = -1;
- jj_gen = 0;
- for (int i = 0; i < 28; i++) jj_la1[i] = -1;
- for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
- }
-
- /** Reinitialise. */
- public void ReInit(java.io.Reader stream) {
- jj_input_stream.ReInit(stream, 1, 1);
- token_source.ReInit(jj_input_stream);
- token = new Token();
- jj_ntk = -1;
- jj_gen = 0;
- for (int i = 0; i < 28; i++) jj_la1[i] = -1;
- for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
- }
-
/** Constructor with generated Token Manager. */
public StandardSyntaxParser(StandardSyntaxParserTokenManager tm) {
token_source = tm;
Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.jj
===================================================================
--- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.jj (revision 1359190)
+++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.jj (working copy)
@@ -5,7 +5,7 @@
options {
STATIC=false;
JAVA_UNICODE_ESCAPE=true;
- USER_CHAR_STREAM=false;
+ USER_CHAR_STREAM=true;
IGNORE_CASE=false;
JDK_VERSION="1.5";
}
@@ -61,14 +61,14 @@
// syntax parser constructor
public StandardSyntaxParser() {
- this(new StringReader(""));
+ this(new FastCharStream(new StringReader("")));
}
/** Parses a query string, returning a {@link org.apache.lucene.queryparser.flexible.core.nodes.QueryNode}.
* @param query the query string to be parsed.
* @throws ParseException if the parsing fails
*/
public QueryNode parse(CharSequence query, CharSequence field) throws QueryNodeParseException {
- ReInit(new StringReader(query.toString()));
+ ReInit(new FastCharStream(new StringReader(query.toString())));
try {
// TopLevelQuery is a Query followed by the end-of-input (EOF)
QueryNode querynode = TopLevelQuery(field);
Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.java
===================================================================
--- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.java (revision 1359190)
+++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.java (working copy)
@@ -1,6 +1,6 @@
/* Generated By:JavaCC: Do not edit this line. StandardSyntaxParserTokenManager.java */
package org.apache.lucene.queryparser.flexible.standard.parser;
-/*
+/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -781,25 +781,23 @@
static final long[] jjtoSkip = {
0x80L,
};
-protected JavaCharStream input_stream;
+protected CharStream input_stream;
private final int[] jjrounds = new int[33];
private final int[] jjstateSet = new int[66];
protected char curChar;
/** Constructor. */
-public StandardSyntaxParserTokenManager(JavaCharStream stream){
- if (JavaCharStream.staticFlag)
- throw new Error("ERROR: Cannot use a static CharStream class with a non-static lexical analyzer.");
+public StandardSyntaxParserTokenManager(CharStream stream){
input_stream = stream;
}
/** Constructor. */
-public StandardSyntaxParserTokenManager(JavaCharStream stream, int lexState){
+public StandardSyntaxParserTokenManager(CharStream stream, int lexState){
this(stream);
SwitchTo(lexState);
}
/** Reinitialise parser. */
-public void ReInit(JavaCharStream stream)
+public void ReInit(CharStream stream)
{
jjmatchedPos = jjnewStateCnt = 0;
curLexState = defaultLexState;
@@ -815,7 +813,7 @@
}
/** Reinitialise parser. */
-public void ReInit(JavaCharStream stream, int lexState)
+public void ReInit(CharStream stream, int lexState)
{
ReInit(stream);
SwitchTo(lexState);
Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/Token.java
===================================================================
--- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/Token.java (revision 1359190)
+++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/Token.java (working copy)
@@ -121,4 +121,4 @@
}
}
-/* JavaCC - OriginalChecksum=e9c55091ec11152bcd3a300ddff5c73a (do not edit this line) */
+/* JavaCC - OriginalChecksum=3b4fe6dcfcfa24a81f1c6ceffae5f73a (do not edit this line) */
Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/TokenMgrError.java
===================================================================
--- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/TokenMgrError.java (revision 1359190)
+++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/TokenMgrError.java (working copy)
@@ -138,4 +138,4 @@
this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
}
}
-/* JavaCC - OriginalChecksum=76b513fd9c50f65248056bbeeff49277 (do not edit this line) */
+/* JavaCC - OriginalChecksum=1efb3d906925f2478637c66473b79bae (do not edit this line) */
Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/LowercaseExpandedTermsQueryNodeProcessor.java
===================================================================
--- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/LowercaseExpandedTermsQueryNodeProcessor.java (revision 1359190)
+++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/LowercaseExpandedTermsQueryNodeProcessor.java (working copy)
@@ -18,6 +18,7 @@
*/
import java.util.List;
+import java.util.Locale;
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
import org.apache.lucene.queryparser.flexible.core.config.QueryConfigHandler;
@@ -46,7 +47,6 @@
QueryNodeProcessorImpl {
public LowercaseExpandedTermsQueryNodeProcessor() {
- // empty constructor
}
@Override
@@ -63,6 +63,11 @@
@Override
protected QueryNode postProcessNode(QueryNode node) throws QueryNodeException {
+
+ Locale locale = getQueryConfigHandler().get(ConfigurationKeys.LOCALE);
+ if (locale == null) {
+ locale = Locale.getDefault();
+ }
if (node instanceof WildcardQueryNode
|| node instanceof FuzzyQueryNode
@@ -71,7 +76,7 @@
TextableQueryNode txtNode = (TextableQueryNode) node;
CharSequence text = txtNode.getText();
- txtNode.setText(text != null ? UnescapedCharSequence.toLowerCase(text) : null);
+ txtNode.setText(text != null ? UnescapedCharSequence.toLowerCase(text, locale) : null);
}
return node;
Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/StandardQueryNodeProcessorPipeline.java
===================================================================
--- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/StandardQueryNodeProcessorPipeline.java (revision 1359190)
+++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/StandardQueryNodeProcessorPipeline.java (working copy)
@@ -17,12 +17,15 @@
* limitations under the License.
*/
+import java.util.Locale;
+
import org.apache.lucene.queryparser.flexible.core.config.QueryConfigHandler;
import org.apache.lucene.queryparser.flexible.core.processors.NoChildOptimizationQueryNodeProcessor;
import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorPipeline;
import org.apache.lucene.queryparser.flexible.core.processors.RemoveDeletedQueryNodesProcessor;
import org.apache.lucene.queryparser.flexible.standard.builders.StandardQueryTreeBuilder;
import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler;
+import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler.ConfigurationKeys;
import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser;
import org.apache.lucene.search.Query;
Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/TermRangeQueryNodeProcessor.java
===================================================================
--- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/TermRangeQueryNodeProcessor.java (revision 1359190)
+++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/TermRangeQueryNodeProcessor.java (working copy)
@@ -22,6 +22,7 @@
import java.util.Date;
import java.util.List;
import java.util.Locale;
+import java.util.TimeZone;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.DateTools.Resolution;
@@ -76,6 +77,12 @@
locale = Locale.getDefault();
}
+ TimeZone timeZone = getQueryConfigHandler().get(ConfigurationKeys.TIMEZONE);
+
+ if (timeZone == null) {
+ timeZone = TimeZone.getDefault();
+ }
+
CharSequence field = termRangeNode.getField();
String fieldStr = null;
@@ -114,7 +121,7 @@
// the time is set to the latest possible time of that date to
// really
// include all documents:
- Calendar cal = Calendar.getInstance(locale);
+ Calendar cal = Calendar.getInstance(timeZone, locale);
cal.setTime(d2);
cal.set(Calendar.HOUR_OF_DAY, 23);
cal.set(Calendar.MINUTE, 59);
Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/StandardQueryParser.java
===================================================================
--- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/StandardQueryParser.java (revision 1359190)
+++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/StandardQueryParser.java (working copy)
@@ -19,6 +19,7 @@
import java.util.Locale;
import java.util.Map;
+import java.util.TimeZone;
import java.util.TooManyListenersException;
import org.apache.lucene.analysis.Analyzer;
@@ -344,6 +345,14 @@
return getQueryConfigHandler().get(ConfigurationKeys.LOCALE);
}
+ public void setTimeZone(TimeZone timeZone) {
+ getQueryConfigHandler().set(ConfigurationKeys.TIMEZONE, timeZone);
+ }
+
+ public TimeZone getTimeZone() {
+ return getQueryConfigHandler().get(ConfigurationKeys.TIMEZONE);
+ }
+
/**
* Sets the default slop for phrases. If zero, then exact phrase matches are
* required. Default value is zero.
Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/parser/FastCharStream.java
===================================================================
--- lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/parser/FastCharStream.java (revision 1359190)
+++ lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/parser/FastCharStream.java (working copy)
@@ -96,7 +96,6 @@
try {
input.close();
} catch (IOException e) {
- System.err.println("Caught: " + e + "; ignoring.");
}
}
Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/QueryTemplateManager.java
===================================================================
--- lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/QueryTemplateManager.java (revision 1359190)
+++ lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/QueryTemplateManager.java (working copy)
@@ -11,7 +11,7 @@
import javax.xml.transform.dom.DOMResult;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
-import java.io.ByteArrayOutputStream;
+import java.io.StringWriter;
import java.io.IOException;
import java.io.InputStream;
import java.util.Enumeration;
@@ -98,10 +98,11 @@
*/
public static String getQueryAsXmlString(Properties formProperties, Templates template)
throws ParserConfigurationException, TransformerException {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- StreamResult result = new StreamResult(baos);
+ // TODO: Suppress XML header with encoding (as Strings have no encoding)
+ StringWriter writer = new StringWriter();
+ StreamResult result = new StreamResult(writer);
transformCriteria(formProperties, template, result);
- return baos.toString();
+ return writer.toString();
}
/**
@@ -109,10 +110,11 @@
*/
public static String getQueryAsXmlString(Properties formProperties, InputStream xslIs)
throws SAXException, IOException, ParserConfigurationException, TransformerException {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- StreamResult result = new StreamResult(baos);
+ // TODO: Suppress XML header with encoding (as Strings have no encoding)
+ StringWriter writer = new StringWriter();
+ StreamResult result = new StreamResult(writer);
transformCriteria(formProperties, xslIs, result);
- return baos.toString();
+ return writer.toString();
}
Index: lucene/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java
===================================================================
--- lucene/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java (revision 1359190)
+++ lucene/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java (working copy)
@@ -17,6 +17,8 @@
* limitations under the License.
*/
+import java.util.Locale;
+
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
@@ -70,7 +72,7 @@
ExtendableQueryParser parser = (ExtendableQueryParser) getParser(null,
ext);
String field = ext.buildExtensionField("testExt", "aField");
- Query query = parser.parse(String.format("%s:foo bar", field));
+ Query query = parser.parse(String.format(Locale.ROOT, "%s:foo bar", field));
assertTrue("expected instance of BooleanQuery but was "
+ query.getClass(), query instanceof BooleanQuery);
BooleanQuery bquery = (BooleanQuery) query;
@@ -102,7 +104,7 @@
ExtendableQueryParser parser = (ExtendableQueryParser) getParser(null,
ext);
String field = ext.buildExtensionField("testExt");
- Query parse = parser.parse(String.format("%s:\"foo \\& bar\"", field));
+ Query parse = parser.parse(String.format(Locale.ROOT, "%s:\"foo \\& bar\"", field));
assertTrue("expected instance of TermQuery but was " + parse.getClass(),
parse instanceof TermQuery);
TermQuery tquery = (TermQuery) parse;
@@ -122,7 +124,7 @@
ExtendableQueryParser parser = (ExtendableQueryParser) getParser(null,
ext);
String field = ext.buildExtensionField("testExt", "afield");
- Query parse = parser.parse(String.format("%s:\"foo \\& bar\"", field));
+ Query parse = parser.parse(String.format(Locale.ROOT, "%s:\"foo \\& bar\"", field));
assertTrue("expected instance of TermQuery but was " + parse.getClass(),
parse instanceof TermQuery);
TermQuery tquery = (TermQuery) parse;
Index: lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/precedence/TestPrecedenceQueryParser.java
===================================================================
--- lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/precedence/TestPrecedenceQueryParser.java (revision 1359190)
+++ lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/precedence/TestPrecedenceQueryParser.java (working copy)
@@ -24,7 +24,9 @@
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.HashMap;
+import java.util.Locale;
import java.util.Map;
+import java.util.TimeZone;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@@ -385,14 +387,16 @@
}
public String getDate(String s) throws Exception {
- DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
+ // we use the default Locale since LuceneTestCase randomizes it
+ DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, Locale.getDefault());
return DateTools.dateToString(df.parse(s), DateTools.Resolution.DAY);
}
private String getLocalizedDate(int year, int month, int day,
boolean extendLastDate) {
- DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
- Calendar calendar = new GregorianCalendar();
+ // we use the default Locale/TZ since LuceneTestCase randomizes it
+ DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, Locale.getDefault());
+ Calendar calendar = new GregorianCalendar(TimeZone.getDefault(), Locale.getDefault());
calendar.set(year, month, day);
if (extendLastDate) {
calendar.set(Calendar.HOUR_OF_DAY, 23);
@@ -406,7 +410,8 @@
public void testDateRange() throws Exception {
String startDate = getLocalizedDate(2002, 1, 1, false);
String endDate = getLocalizedDate(2002, 1, 4, false);
- Calendar endDateExpected = new GregorianCalendar();
+ // we use the default Locale/TZ since LuceneTestCase randomizes it
+ Calendar endDateExpected = new GregorianCalendar(TimeZone.getDefault(), Locale.getDefault());
endDateExpected.set(2002, 1, 4, 23, 59, 59);
endDateExpected.set(Calendar.MILLISECOND, 999);
final String defaultField = "default";
@@ -441,7 +446,8 @@
/** for testing DateTools support */
private String getDate(String s, DateTools.Resolution resolution) throws Exception {
- DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
+ // we use the default Locale since LuceneTestCase randomizes it
+ DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, Locale.getDefault());
return getDate(df.parse(s), resolution);
}
Index: lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java
===================================================================
--- lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java (revision 1359190)
+++ lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java (working copy)
@@ -27,6 +27,7 @@
import java.util.List;
import java.util.Locale;
import java.util.Map;
+import java.util.TimeZone;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -675,7 +676,8 @@
/** for testing DateTools support */
private String getDate(String s, DateTools.Resolution resolution)
throws Exception {
- DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
+ // we use the default Locale since LuceneTestCase randomizes it
+ DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, Locale.getDefault());
return getDate(df.parse(s), resolution);
}
@@ -693,8 +695,9 @@
}
private String getLocalizedDate(int year, int month, int day) {
- DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
- Calendar calendar = new GregorianCalendar();
+ // we use the default Locale/TZ since LuceneTestCase randomizes it
+ DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, Locale.getDefault());
+ Calendar calendar = new GregorianCalendar(TimeZone.getDefault(), Locale.getDefault());
calendar.clear();
calendar.set(year, month, day);
calendar.set(Calendar.HOUR_OF_DAY, 23);
@@ -707,7 +710,8 @@
public void testDateRange() throws Exception {
String startDate = getLocalizedDate(2002, 1, 1);
String endDate = getLocalizedDate(2002, 1, 4);
- Calendar endDateExpected = new GregorianCalendar();
+ // we use the default Locale/TZ since LuceneTestCase randomizes it
+ Calendar endDateExpected = new GregorianCalendar(TimeZone.getDefault(), Locale.getDefault());
endDateExpected.clear();
endDateExpected.set(2002, 1, 4, 23, 59, 59);
endDateExpected.set(Calendar.MILLISECOND, 999);
Index: lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java
===================================================================
--- lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java (revision 1359190)
+++ lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java (working copy)
@@ -24,6 +24,7 @@
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.Locale;
+import java.util.TimeZone;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -581,7 +582,8 @@
/** for testing DateTools support */
private String getDate(String s, DateTools.Resolution resolution) throws Exception {
- DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
+ // we use the default Locale since LuceneTestCase randomizes it
+ DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, Locale.getDefault());
return getDate(df.parse(s), resolution);
}
@@ -591,8 +593,9 @@
}
private String getLocalizedDate(int year, int month, int day) {
- DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
- Calendar calendar = new GregorianCalendar();
+ // we use the default Locale/TZ since LuceneTestCase randomizes it
+ DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, Locale.getDefault());
+ Calendar calendar = new GregorianCalendar(TimeZone.getDefault(), Locale.getDefault());
calendar.clear();
calendar.set(year, month, day);
calendar.set(Calendar.HOUR_OF_DAY, 23);
@@ -605,7 +608,8 @@
public void testDateRange() throws Exception {
String startDate = getLocalizedDate(2002, 1, 1);
String endDate = getLocalizedDate(2002, 1, 4);
- Calendar endDateExpected = new GregorianCalendar();
+ // we use the default Locale/TZ since LuceneTestCase randomizes it
+ Calendar endDateExpected = new GregorianCalendar(TimeZone.getDefault(), Locale.getDefault());
endDateExpected.clear();
endDateExpected.set(2002, 1, 4, 23, 59, 59);
endDateExpected.set(Calendar.MILLISECOND, 999);
Index: lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeFilterBuilder.java
===================================================================
--- lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeFilterBuilder.java (revision 1359190)
+++ lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeFilterBuilder.java (working copy)
@@ -203,7 +203,7 @@
private static Document getDocumentFromString(String str)
throws SAXException, IOException, ParserConfigurationException {
- InputStream is = new ByteArrayInputStream(str.getBytes());
+ InputStream is = new ByteArrayInputStream(str.getBytes("UTF-8"));
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
DocumentBuilder builder = factory.newDocumentBuilder();
Index: lucene/spatial
===================================================================
--- lucene/spatial (revision 1359190)
+++ lucene/spatial (working copy)
Property changes on: lucene/spatial
___________________________________________________________________
Added: svn:mergeinfo
Merged /lucene/dev/branches/lucene4199/lucene/spatial:r1358548-1359191
Merged /lucene/dev/branches/lucene3969/lucene/spatial:r1311219-1324948
Merged /lucene/dev/branches/branch_3x/lucene/spatial:r1232954,1302749,1302808,1303007,1303023,1303269,1303733,1303854,1304295,1304360,1304660,1304904,1305074,1305142,1305681,1305693,1305719,1305741,1305816,1305837,1306929,1307050
Merged /lucene/dev/branches/branch_4x/lucene/spatial:r1344391,1344929,1348012,1348274,1348293,1348919,1348951,1349048,1349340,1349446,1349991,1353701,1355203,1356608
Merged /lucene/dev/branches/lucene4055/lucene/spatial:r1338960-1343359
Index: lucene/spatial/src/java/org/apache/lucene/spatial/bbox/BBoxStrategy.java
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/bbox/BBoxStrategy.java (revision 1359190)
+++ lucene/spatial/src/java/org/apache/lucene/spatial/bbox/BBoxStrategy.java (working copy)
@@ -122,7 +122,7 @@
ff.setIndexOptions(IndexOptions.DOCS_ONLY);
ff.freeze();
- NumberFormat nf = NumberFormat.getInstance( Locale.US );
+ NumberFormat nf = NumberFormat.getInstance( Locale.ROOT );
nf.setMaximumFractionDigits( 5 );
nf.setMinimumFractionDigits( 5 );
nf.setGroupingUsed(false);
Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java (revision 1359190)
+++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java (working copy)
@@ -28,6 +28,7 @@
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
+import java.util.Locale;
/**
* @lucene.experimental
@@ -107,7 +108,7 @@
}
public void printInfo() {
- NumberFormat nf = NumberFormat.getNumberInstance();
+ NumberFormat nf = NumberFormat.getNumberInstance(Locale.ROOT);
nf.setMaximumFractionDigits(5);
nf.setMinimumFractionDigits(5);
nf.setMinimumIntegerDigits(3);
Index: lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialArgs.java
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialArgs.java (revision 1359190)
+++ lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialArgs.java (working copy)
@@ -17,6 +17,8 @@
* limitations under the License.
*/
+import java.util.Locale;
+
import com.spatial4j.core.exception.InvalidSpatialArgument;
import com.spatial4j.core.shape.Shape;
@@ -64,7 +66,7 @@
if (max != null) {
str.append(" max=").append(max);
}
- str.append(" distPrec=").append(String.format("%.2f%%", distPrecision / 100d));
+ str.append(" distPrec=").append(String.format(Locale.ROOT, "%.2f%%", distPrecision / 100d));
str.append(')');
return str.toString();
}
Index: lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialOperation.java
===================================================================
--- lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialOperation.java (revision 1359190)
+++ lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialOperation.java (working copy)
@@ -61,14 +61,14 @@
this.sourceNeedsArea = sourceNeedsArea;
this.targetNeedsArea = targetNeedsArea;
registry.put(name, this);
- registry.put(name.toUpperCase(Locale.US), this);
+ registry.put(name.toUpperCase(Locale.ROOT), this);
list.add( this );
}
public static SpatialOperation get( String v ) {
SpatialOperation op = registry.get( v );
if( op == null ) {
- op = registry.get(v.toUpperCase(Locale.US));
+ op = registry.get(v.toUpperCase(Locale.ROOT));
}
if( op == null ) {
throw new InvalidSpatialArgument("Unknown Operation: " + v );
Index: lucene/suggest
===================================================================
--- lucene/suggest (revision 1359190)
+++ lucene/suggest (working copy)
Property changes on: lucene/suggest
___________________________________________________________________
Added: svn:mergeinfo
Merged /lucene/dev/branches/lucene3969/lucene/suggest:r1311219-1324948
Merged /lucene/dev/branches/branch_3x/lucene/suggest:r1232954,1302749,1302808,1303007,1303023,1303269,1303733,1303854,1304295,1304360,1304660,1304904,1305074,1305142,1305681,1305693,1305719,1305741,1305816,1305837,1306929,1307050
Merged /lucene/dev/branches/branch_4x/lucene/suggest:r1344391,1344929,1348012,1348274,1348293,1348919,1348951,1349048,1349340,1349446,1349991,1353701,1355203,1356608
Merged /lucene/dev/branches/lucene4055/lucene/suggest:r1338960-1343359
Merged /lucene/dev/branches/lucene4199/lucene/suggest:r1358548-1359191
Index: lucene/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java
===================================================================
--- lucene/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java (revision 1359190)
+++ lucene/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java (working copy)
@@ -320,7 +320,7 @@
return new SuggestWord[0];
if (lowerCaseTerms) {
- term = new Term(term.field(), text.toLowerCase(Locale.ENGLISH));
+ term = new Term(term.field(), text.toLowerCase(Locale.ROOT));
}
int docfreq = ir.docFreq(term);
Index: lucene/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java
===================================================================
--- lucene/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java (revision 1359190)
+++ lucene/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java (working copy)
@@ -38,12 +38,12 @@
private BufferedReader in;
- public PlainTextDictionary(File file) throws FileNotFoundException {
- in = new BufferedReader(new FileReader(file));
+ public PlainTextDictionary(File file) throws IOException {
+ in = new BufferedReader(IOUtils.getDecodingReader(file, IOUtils.CHARSET_UTF_8));
}
public PlainTextDictionary(InputStream dictFile) {
- in = new BufferedReader(new InputStreamReader(dictFile));
+ in = new BufferedReader(IOUtils.getDecodingReader(dictFile, IOUtils.CHARSET_UTF_8));
}
/**
Index: lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java
===================================================================
--- lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java (revision 1359190)
+++ lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java (working copy)
@@ -42,7 +42,7 @@
private boolean done = false;
public FileDictionary(InputStream dictFile) {
- in = new BufferedReader(new InputStreamReader(dictFile));
+ in = new BufferedReader(IOUtils.getDecodingReader(dictFile, IOUtils.CHARSET_UTF_8));
}
/**
Index: lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java
===================================================================
--- lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java (revision 1359190)
+++ lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java (working copy)
@@ -132,7 +132,7 @@
@Override
public String toString() {
- return String.format(Locale.ENGLISH,
+ return String.format(Locale.ROOT,
"time=%.2f sec. total (%.2f reading, %.2f sorting, %.2f merging), lines=%d, temp files=%d, merges=%d, soft ram limit=%.2f MB",
totalTime / 1000.0d, readTime / 1000.0d, sortTime / 1000.0d, mergeTime / 1000.0d,
lines, tempMergeFiles, mergeRounds,
Index: lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java
===================================================================
--- lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java (revision 1359190)
+++ lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java (working copy)
@@ -35,9 +35,12 @@
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.List;
+import java.util.Locale;
import java.util.Vector;
import java.util.zip.GZIPInputStream;
+import org.apache.lucene.util.IOUtils;
+
/**
* Implementation of a Ternary Search Trie, a data structure for storing
* String objects that combines the compact size of a binary search
@@ -147,13 +150,20 @@
/** The base node in the trie. */
private TSTNode rootNode;
+
+ private final Locale locale;
/**
* Constructs an empty Ternary Search Trie.
*/
public JaspellTernarySearchTrie() {
+ this(Locale.ROOT);
}
+ public JaspellTernarySearchTrie(Locale locale) {
+ this.locale = locale;
+ }
+
// for loading
void setRoot(TSTNode newRoot) {
rootNode = newRoot;
@@ -196,10 +206,10 @@
this();
BufferedReader in;
if (compression)
- in = new BufferedReader(new InputStreamReader(new GZIPInputStream(
- new FileInputStream(file))));
- else in = new BufferedReader(new InputStreamReader((new FileInputStream(
- file))));
+ in = new BufferedReader(IOUtils.getDecodingReader(new GZIPInputStream(
+ new FileInputStream(file)), IOUtils.CHARSET_UTF_8));
+ else in = new BufferedReader(IOUtils.getDecodingReader((new FileInputStream(
+ file)), IOUtils.CHARSET_UTF_8));
String word;
int pos;
Float occur, one = new Float(1);
@@ -212,7 +222,7 @@
occur = Float.parseFloat(word.substring(pos + 1).trim());
word = word.substring(0, pos);
}
- String key = word.toLowerCase();
+ String key = word.toLowerCase(locale);
if (rootNode == null) {
rootNode = new TSTNode(key.charAt(0), null);
}
@@ -242,7 +252,7 @@
if (occur2 != null) {
occur += occur2.floatValue();
}
- currentNode = getOrCreateNode(word.trim().toLowerCase());
+ currentNode = getOrCreateNode(word.trim().toLowerCase(locale));
currentNode.data = occur;
}
}
@@ -382,7 +392,7 @@
*@return The Float retrieved from the Ternary Search Trie.
*/
public Float getAndIncrement(String key) {
- String key2 = key.trim().toLowerCase();
+ String key2 = key.trim().toLowerCase(locale);
TSTNode node = getNode(key2);
if (node == null) {
return null;
@@ -763,7 +773,7 @@
* the Trie.
*/
public void remove(String key) {
- deleteNode(getNode(key.trim().toLowerCase()));
+ deleteNode(getNode(key.trim().toLowerCase(locale)));
}
/**
Index: lucene/suggest/src/test/org/apache/lucene/search/spell/TestSpellChecker.java
===================================================================
--- lucene/suggest/src/test/org/apache/lucene/search/spell/TestSpellChecker.java (revision 1359190)
+++ lucene/suggest/src/test/org/apache/lucene/search/spell/TestSpellChecker.java (working copy)
@@ -22,6 +22,7 @@
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
+import java.util.Locale;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
@@ -435,8 +436,8 @@
executor.awaitTermination(60L, TimeUnit.SECONDS);
for (int i = 0; i < workers.length; i++) {
- assertFalse(String.format("worker thread %d failed", i), workers[i].failed);
- assertTrue(String.format("worker thread %d is still running but should be terminated", i), workers[i].terminated);
+ assertFalse(String.format(Locale.ROOT, "worker thread %d failed", i), workers[i].failed);
+ assertTrue(String.format(Locale.ROOT, "worker thread %d is still running but should be terminated", i), workers[i].terminated);
}
// 4 searchers more than iterations
// 1. at creation
Index: lucene/suggest/src/test/org/apache/lucene/search/suggest/Average.java
===================================================================
--- lucene/suggest/src/test/org/apache/lucene/search/suggest/Average.java (revision 1359190)
+++ lucene/suggest/src/test/org/apache/lucene/search/suggest/Average.java (working copy)
@@ -47,7 +47,7 @@
public String toString()
{
- return String.format(Locale.ENGLISH, "%.0f [+- %.2f]",
+ return String.format(Locale.ROOT, "%.0f [+- %.2f]",
avg, stddev);
}
Index: lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java
===================================================================
--- lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java (revision 1359190)
+++ lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java (working copy)
@@ -233,9 +233,9 @@
StringBuilder b = new StringBuilder();
String format = "%" + colLen + "s " + "%" + colLen + "s\n";
- b.append(String.format(Locale.ENGLISH, format, "Expected", "Result"));
+ b.append(String.format(Locale.ROOT, format, "Expected", "Result"));
for (int i = 0; i < Math.max(result.length, expected.length); i++) {
- b.append(String.format(Locale.ENGLISH, format,
+ b.append(String.format(Locale.ROOT, format,
i < expected.length ? expected[i] : "--",
i < result.length ? result[i] : "--"));
}
Index: lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java
===================================================================
--- lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java (revision 1359190)
+++ lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java (working copy)
@@ -119,7 +119,7 @@
});
System.err.println(
- String.format(Locale.ENGLISH, "%-15s input: %d, time[ms]: %s",
+ String.format(Locale.ROOT, "%-15s input: %d, time[ms]: %s",
cls.getSimpleName(),
dictionaryInput.length,
result.average.toString()));
@@ -134,7 +134,7 @@
for (Class extends Lookup> cls : benchmarkClasses) {
Lookup lookup = buildLookup(cls, dictionaryInput);
System.err.println(
- String.format(Locale.ENGLISH, "%-15s size[B]:%,13d",
+ String.format(Locale.ROOT, "%-15s size[B]:%,13d",
lookup.getClass().getSimpleName(),
RamUsageEstimator.sizeOf(lookup)));
}
@@ -181,7 +181,7 @@
*/
public void runPerformanceTest(final int minPrefixLen, final int maxPrefixLen,
final int num, final boolean onlyMorePopular) throws Exception {
- System.err.println(String.format(Locale.ENGLISH,
+ System.err.println(String.format(Locale.ROOT,
"-- prefixes: %d-%d, num: %d, onlyMorePopular: %s",
minPrefixLen, maxPrefixLen, num, onlyMorePopular));
@@ -206,7 +206,7 @@
});
System.err.println(
- String.format(Locale.ENGLISH, "%-15s queries: %d, time[ms]: %s, ~kQPS: %.0f",
+ String.format(Locale.ROOT, "%-15s queries: %d, time[ms]: %s, ~kQPS: %.0f",
lookup.getClass().getSimpleName(),
input.size(),
result.average.toString(),
Index: lucene/test-framework
===================================================================
--- lucene/test-framework (revision 1359190)
+++ lucene/test-framework (working copy)
Property changes on: lucene/test-framework
___________________________________________________________________
Modified: svn:mergeinfo
Merged /lucene/dev/branches/lucene4199/lucene/test-framework:r1358548-1359191
Index: lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (revision 1359190)
+++ lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (working copy)
@@ -595,7 +595,7 @@
} else {
// TODO: we can make ascii easier to read if we
// don't escape...
- sb.append(String.format("\\u%04x", c));
+ sb.append(String.format(Locale.ROOT, "\\u%04x", c));
}
charUpto += Character.charCount(c);
}
Index: lucene/test-framework/src/java/org/apache/lucene/index/AlcoholicMergePolicy.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/index/AlcoholicMergePolicy.java (revision 1359190)
+++ lucene/test-framework/src/java/org/apache/lucene/index/AlcoholicMergePolicy.java (working copy)
@@ -20,6 +20,7 @@
import java.io.IOException;
import java.util.Calendar;
import java.util.GregorianCalendar;
+import java.util.Locale;
import java.util.Random;
import java.util.TimeZone;
@@ -45,7 +46,7 @@
private final Calendar calendar;
public AlcoholicMergePolicy(TimeZone tz, Random random) {
- this.calendar = new GregorianCalendar(tz);
+ this.calendar = new GregorianCalendar(tz, Locale.ROOT);
this.random = random;
maxMergeSize = _TestUtil.nextInt(random, 1024*1024, Integer.MAX_VALUE);
}
Index: lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java (revision 1359190)
+++ lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java (working copy)
@@ -71,7 +71,7 @@
codec = formats.get(Math.abs(perFieldSeed ^ name.hashCode()) % formats.size());
if (codec instanceof SimpleTextPostingsFormat && perFieldSeed % 5 != 0) {
// make simpletext rarer, choose again
- codec = formats.get(Math.abs(perFieldSeed ^ name.toUpperCase(Locale.ENGLISH).hashCode()) % formats.size());
+ codec = formats.get(Math.abs(perFieldSeed ^ name.toUpperCase(Locale.ROOT).hashCode()) % formats.size());
}
previousMappings.put(name, codec);
// Safety:
Index: lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java (revision 1359190)
+++ lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java (working copy)
@@ -357,7 +357,7 @@
// - end with one of: "product of:", "sum of:", "max of:", or
// - have "max plus