Index: . =================================================================== --- . (revision 1359190) +++ . (working copy) Property changes on: . ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/branches/lucene4199:r1358548-1359191 Index: lucene =================================================================== --- lucene (revision 1359190) +++ lucene (working copy) Property changes on: lucene ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/branches/lucene4199/lucene:r1358548-1359191 Index: lucene/analysis =================================================================== --- lucene/analysis (revision 1359190) +++ lucene/analysis (working copy) Property changes on: lucene/analysis ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/branches/lucene4199/lucene/analysis:r1358548-1359191 Index: lucene/analysis/common =================================================================== --- lucene/analysis/common (revision 1359190) +++ lucene/analysis/common (working copy) Property changes on: lucene/analysis/common ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/branches/lucene4199/lucene/analysis/common:r1358548-1359191 Index: lucene/analysis/common/build.xml =================================================================== --- lucene/analysis/common/build.xml (revision 1359190) +++ lucene/analysis/common/build.xml (working copy) @@ -67,44 +67,50 @@ - + - - - - + + + - - - + + + + + + + + + + + + + + + Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemmer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemmer.java (revision 1359190) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemmer.java (working copy) @@ -1,5 +1,7 @@ package org.apache.lucene.analysis.br; +import java.util.Locale; + /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -21,6 +23,7 @@ * A stemmer for Brazilian Portuguese words. */ public class BrazilianStemmer { + private static final Locale locale = new Locale("pt", "BR"); /** * Changed term @@ -243,7 +246,7 @@ return null ; } - value = value.toLowerCase() ; + value = value.toLowerCase(locale) ; for (j=0 ; j < value.length() ; j++) { if ((value.charAt(j) == 'á') || (value.charAt(j) == 'â') || Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java (revision 1359190) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java (working copy) @@ -1,4 +1,7 @@ package org.apache.lucene.analysis.de; + +import java.util.Locale; + // This file is encoded in UTF-8 /* @@ -37,6 +40,8 @@ * Amount of characters that are removed with substitute() while stemming. */ private int substCount = 0; + + private static final Locale locale = new Locale("de", "DE"); /** * Stemms the given term to an unique discriminator. @@ -47,7 +52,7 @@ protected String stem( String term ) { // Use lowercase for medium stemming. - term = term.toLowerCase(); + term = term.toLowerCase(locale); if ( !isStemmable( term ) ) return term; // Reset the StringBuilder. Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellDictionary.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellDictionary.java (revision 1359190) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellDictionary.java (working copy) @@ -252,7 +252,7 @@ } String condition = ruleArgs[4]; - affix.setCondition(condition, String.format(conditionPattern, condition)); + affix.setCondition(condition, String.format(Locale.ROOT, conditionPattern, condition)); affix.setCrossProduct(crossProduct); List list = affixes.get(affix.getAppend()); @@ -376,7 +376,7 @@ Arrays.sort(wordForm.getFlags()); entry = line.substring(0, flagSep); if(ignoreCase) { - entry = entry.toLowerCase(Locale.ENGLISH); + entry = entry.toLowerCase(Locale.ROOT); } } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemmer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemmer.java (revision 1359190) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemmer.java (working copy) @@ -20,6 +20,7 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.Charset; import java.text.ParseException; import java.util.ArrayList; import java.util.Arrays; @@ -330,7 +331,7 @@ HunspellStemmer stemmer = new HunspellStemmer(dictionary); - Scanner scanner = new Scanner(System.in); + Scanner scanner = new Scanner(System.in, Charset.defaultCharset().name()); System.out.print("> "); while (scanner.hasNextLine()) { Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java (revision 1359190) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java (working copy) @@ -20,6 +20,7 @@ import java.text.DateFormat; import java.text.ParseException; import java.util.Date; +import java.util.Locale; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.AttributeSource; @@ -37,10 +38,11 @@ protected CharTermAttribute termAtt; /** - * Uses {@link java.text.SimpleDateFormat#getDateInstance()} as the {@link java.text.DateFormat} object. + * Uses {@link java.text.SimpleDateFormat#getDateInstance(DateFormat.DEFAULT, Locale.ROOT)} as + * the {@link java.text.DateFormat} object. */ public DateRecognizerSinkFilter() { - this(DateFormat.getDateInstance()); + this(DateFormat.getDateInstance(DateFormat.DEFAULT, Locale.ROOT)); } public DateRecognizerSinkFilter(DateFormat dateFormat) { Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java (revision 1359190) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java (working copy) @@ -1,8 +1,8 @@ -/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 9/30/11 12:10 PM */ +/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 08.07.12 16:59 */ package org.apache.lucene.analysis.standard; -/* +/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -33,8 +33,8 @@ /** * This class is a scanner generated by * JFlex 1.5.0-SNAPSHOT - * on 9/30/11 12:10 PM from the specification file - * /lucene/jflex/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex + * on 08.07.12 16:59 from the specification file + * C:/Users/Uwe Schindler/Projects/lucene/lucene4199/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex */ class ClassicTokenizerImpl implements StandardTokenizerInterface { @@ -383,15 +383,7 @@ this.zzReader = in; } - /** - * Creates a new scanner. - * There is also java.io.Reader version of this constructor. - * - * @param in the java.io.Inputstream to read input from. - */ - ClassicTokenizerImpl(java.io.InputStream in) { - this(new java.io.InputStreamReader(in)); - } + /** * Unpacks the compressed character translation table. Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java (revision 1359190) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java (working copy) @@ -1,8 +1,8 @@ -/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 9/30/11 12:10 PM */ +/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 08.07.12 16:59 */ package org.apache.lucene.analysis.standard; -/* +/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -759,15 +759,7 @@ this.zzReader = in; } - /** - * Creates a new scanner. - * There is also java.io.Reader version of this constructor. - * - * @param in the java.io.Inputstream to read input from. - */ - public StandardTokenizerImpl(java.io.InputStream in) { - this(new java.io.InputStreamReader(in)); - } + /** * Unpacks the compressed character translation table. Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro (revision 1359190) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro (working copy) @@ -14,7 +14,7 @@ * limitations under the License. */ -// Generated using ICU4J 4.8.0.0 on Friday, September 30, 2011 4:10:42 PM UTC +// Generated using ICU4J 4.8.1.1 on Sunday, July 8, 2012 2:59:49 PM UTC // by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java (revision 1359190) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java (working copy) @@ -1,4 +1,4 @@ -/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 3/18/12 12:05 PM */ +/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 08.07.12 17:00 */ package org.apache.lucene.analysis.standard; @@ -3844,15 +3844,7 @@ this.zzReader = in; } - /** - * Creates a new scanner. - * There is also java.io.Reader version of this constructor. - * - * @param in the java.io.Inputstream to read input from. - */ - public UAX29URLEmailTokenizerImpl(java.io.InputStream in) { - this(new java.io.InputStreamReader(in)); - } + /** * Unpacks the compressed character translation table. Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex (revision 1359190) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex (working copy) @@ -1,6 +1,6 @@ package org.apache.lucene.analysis.standard; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java (revision 1359190) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java (working copy) @@ -1,8 +1,8 @@ -/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 1/22/12 10:26 PM */ +/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 08.07.12 17:00 */ package org.apache.lucene.analysis.wikipedia; -/* +/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -25,8 +25,8 @@ /** * This class is a scanner generated by * JFlex 1.5.0-SNAPSHOT - * on 1/22/12 10:26 PM from the specification file - * /home/rmuir/workspace/lucene-clean-trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex + * on 08.07.12 17:00 from the specification file + * C:/Users/Uwe Schindler/Projects/lucene/lucene4199/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex */ class WikipediaTokenizerImpl { @@ -519,15 +519,7 @@ this.zzReader = in; } - /** - * Creates a new scanner. - * There is also java.io.Reader version of this constructor. - * - * @param in the java.io.Inputstream to read input from. - */ - WikipediaTokenizerImpl(java.io.InputStream in) { - this(new java.io.InputStreamReader(in)); - } + /** * Unpacks the compressed character translation table. Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java (revision 1359190) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java (working copy) @@ -79,7 +79,7 @@ public boolean incrementToken() throws IOException { if (input.incrementToken()) { if (!keywordAttr.isKeyword()) { - final String term = termAtt.toString().toLowerCase(Locale.ENGLISH); + final String term = termAtt.toString().toLowerCase(Locale.ROOT); termAtt.setEmpty().append(term); } return true; Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java (revision 1359190) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java (working copy) @@ -27,7 +27,7 @@ public class DateRecognizerSinkTokenizerTest extends BaseTokenStreamTestCase { public void test() throws IOException { - DateRecognizerSinkFilter sinkFilter = new DateRecognizerSinkFilter(new SimpleDateFormat("MM/dd/yyyy", Locale.US)); + DateRecognizerSinkFilter sinkFilter = new DateRecognizerSinkFilter(new SimpleDateFormat("MM/dd/yyyy", Locale.ROOT)); String test = "The quick red fox jumped over the lazy brown dogs on 7/11/2006 The dogs finally reacted on 7/12/2006"; TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false)); TeeSinkTokenFilter.SinkTokenStream sink = tee.newSinkTokenStream(sinkFilter); Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java (revision 1359190) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java (working copy) @@ -18,6 +18,7 @@ import java.io.IOException; import java.io.StringReader; +import java.util.Locale; import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.core.LowerCaseFilter; @@ -164,7 +165,7 @@ TokenStream lowerCasing = new LowerCaseFilter(TEST_VERSION_CURRENT, source1); String[] lowerCaseTokens = new String[tokens1.length]; for (int i = 0; i < tokens1.length; i++) - lowerCaseTokens[i] = tokens1[i].toLowerCase(); + lowerCaseTokens[i] = tokens1[i].toLowerCase(Locale.ROOT); assertTokenStreamContents(lowerCasing, lowerCaseTokens); } @@ -180,7 +181,7 @@ StringBuilder buffer = new StringBuilder(); System.out.println("-----Tokens: " + tokCount[k] + "-----"); for (int i = 0; i < tokCount[k]; i++) { - buffer.append(English.intToEnglish(i).toUpperCase()).append(' '); + buffer.append(English.intToEnglish(i).toUpperCase(Locale.ROOT)).append(' '); } //make sure we produce the same tokens TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.toString())))); Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayIterator.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayIterator.java (revision 1359190) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayIterator.java (working copy) @@ -32,7 +32,8 @@ } public void testConsumeWordInstance() { - BreakIterator bi = BreakIterator.getWordInstance(); + // we use the default locale, as its randomized by LuceneTestCase + BreakIterator bi = BreakIterator.getWordInstance(Locale.getDefault()); CharArrayIterator ci = CharArrayIterator.newWordInstance(); for (int i = 0; i < 10000; i++) { char text[] = _TestUtil.randomUnicodeString(random()).toCharArray(); @@ -43,7 +44,8 @@ /* run this to test if your JRE is buggy public void testWordInstanceJREBUG() { - BreakIterator bi = BreakIterator.getWordInstance(); + // we use the default locale, as its randomized by LuceneTestCase + BreakIterator bi = BreakIterator.getWordInstance(Locale.getDefault()); Segment ci = new Segment(); for (int i = 0; i < 10000; i++) { char text[] = _TestUtil.randomUnicodeString(random).toCharArray(); @@ -60,7 +62,8 @@ } public void testConsumeSentenceInstance() { - BreakIterator bi = BreakIterator.getSentenceInstance(); + // we use the default locale, as its randomized by LuceneTestCase + BreakIterator bi = BreakIterator.getSentenceInstance(Locale.getDefault()); CharArrayIterator ci = CharArrayIterator.newSentenceInstance(); for (int i = 0; i < 10000; i++) { char text[] = _TestUtil.randomUnicodeString(random()).toCharArray(); @@ -71,7 +74,8 @@ /* run this to test if your JRE is buggy public void testSentenceInstanceJREBUG() { - BreakIterator bi = BreakIterator.getSentenceInstance(); + // we use the default locale, as its randomized by LuceneTestCase + BreakIterator bi = BreakIterator.getSentenceInstance(Locale.getDefault()); Segment ci = new Segment(); for (int i = 0; i < 10000; i++) { char text[] = _TestUtil.randomUnicodeString(random).toCharArray(); Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayMap.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayMap.java (revision 1359190) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayMap.java (working copy) @@ -36,7 +36,7 @@ key[j] = (char)random().nextInt(127); } String keyStr = new String(key); - String hmapKey = ignoreCase ? keyStr.toLowerCase(Locale.ENGLISH) : keyStr; + String hmapKey = ignoreCase ? keyStr.toLowerCase(Locale.ROOT) : keyStr; int val = random().nextInt(); Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java (revision 1359190) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java (working copy) @@ -208,16 +208,16 @@ set.add(upper); } for (int i = 0; i < upperArr.length; i++) { - assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i])); - assertTrue(String.format(missing, lowerArr[i]), set.contains(lowerArr[i])); + assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i])); + assertTrue(String.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i])); } set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), false); for (String upper : upperArr) { set.add(upper); } for (int i = 0; i < upperArr.length; i++) { - assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i])); - assertFalse(String.format(falsePos, lowerArr[i]), set.contains(lowerArr[i])); + assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i])); + assertFalse(String.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i])); } } @@ -235,8 +235,8 @@ set.add(upper); } for (int i = 0; i < upperArr.length; i++) { - assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i])); - assertTrue(String.format(missing, lowerArr[i]), set.contains(lowerArr[i])); + assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i])); + assertTrue(String.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i])); } set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), false); @@ -244,8 +244,8 @@ set.add(upper); } for (int i = 0; i < upperArr.length; i++) { - assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i])); - assertFalse(String.format(falsePos, upperArr[i]), set + assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i])); + assertFalse(String.format(Locale.ROOT, falsePos, upperArr[i]), set .contains(lowerArr[i])); } } @@ -258,7 +258,7 @@ List stopwords = Arrays.asList(TEST_STOP_WORDS); List stopwordsUpper = new ArrayList(); for (String string : stopwords) { - stopwordsUpper.add(string.toUpperCase()); + stopwordsUpper.add(string.toUpperCase(Locale.ROOT)); } setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS)); setIngoreCase.add(Integer.valueOf(1)); @@ -305,7 +305,7 @@ List stopwords = Arrays.asList(TEST_STOP_WORDS); List stopwordsUpper = new ArrayList(); for (String string : stopwords) { - stopwordsUpper.add(string.toUpperCase()); + stopwordsUpper.add(string.toUpperCase(Locale.ROOT)); } setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS)); setIngoreCase.add(Integer.valueOf(1)); @@ -351,7 +351,7 @@ List stopwords = Arrays.asList(TEST_STOP_WORDS); List stopwordsUpper = new ArrayList(); for (String string : stopwords) { - stopwordsUpper.add(string.toUpperCase()); + stopwordsUpper.add(string.toUpperCase(Locale.ROOT)); } set.addAll(Arrays.asList(TEST_STOP_WORDS)); Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java (revision 1359190) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java (working copy) @@ -20,6 +20,7 @@ import java.io.IOException; import java.io.Reader; import java.io.StringReader; +import java.util.Locale; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; @@ -53,7 +54,7 @@ // internal buffer size is 1024 make sure we have a surrogate pair right at the border builder.insert(1023, "\ud801\udc1c"); Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString())); - assertTokenStreamContents(tokenizer, builder.toString().toLowerCase().split(" ")); + assertTokenStreamContents(tokenizer, builder.toString().toLowerCase(Locale.ROOT).split(" ")); } /* @@ -70,7 +71,7 @@ } builder.append("\ud801\udc1cabc"); Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString())); - assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase()}); + assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT)}); } } @@ -84,7 +85,7 @@ builder.append("A"); } Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString())); - assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(), builder.toString().toLowerCase()}); + assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)}); } /* @@ -98,7 +99,7 @@ } builder.append("\ud801\udc1c"); Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString())); - assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(), builder.toString().toLowerCase()}); + assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)}); } // LUCENE-3642: normalize SMP->BMP and check that offsets are correct Index: lucene/analysis/common/src/tools/java/org/apache/lucene/analysis/standard/GenerateJflexTLDMacros.java =================================================================== --- lucene/analysis/common/src/tools/java/org/apache/lucene/analysis/standard/GenerateJflexTLDMacros.java (revision 1359190) +++ lucene/analysis/common/src/tools/java/org/apache/lucene/analysis/standard/GenerateJflexTLDMacros.java (working copy) @@ -123,11 +123,11 @@ while (null != (line = reader.readLine())) { Matcher matcher = TLD_PATTERN_1.matcher(line); if (matcher.matches()) { - TLDs.add(matcher.group(1).toLowerCase(Locale.US)); + TLDs.add(matcher.group(1).toLowerCase(Locale.ROOT)); } else { matcher = TLD_PATTERN_2.matcher(line); if (matcher.matches()) { - TLDs.add(matcher.group(1).toLowerCase(Locale.US)); + TLDs.add(matcher.group(1).toLowerCase(Locale.ROOT)); } } } @@ -146,7 +146,7 @@ */ private void writeOutput(SortedSet ASCIITLDs) throws IOException { final DateFormat dateFormat = DateFormat.getDateTimeInstance - (DateFormat.FULL, DateFormat.FULL, Locale.US); + (DateFormat.FULL, DateFormat.FULL, Locale.ROOT); dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); final Writer writer = new OutputStreamWriter (new FileOutputStream(outputFile), "UTF-8"); Index: lucene/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java =================================================================== --- lucene/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java (revision 1359190) +++ lucene/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java (working copy) @@ -64,7 +64,7 @@ // public void testCollationKeySort() throws Exception { Analyzer usAnalyzer = new ICUCollationKeyAnalyzer - (TEST_VERSION_CURRENT, Collator.getInstance(Locale.US)); + (TEST_VERSION_CURRENT, Collator.getInstance(Locale.ROOT)); Analyzer franceAnalyzer = new ICUCollationKeyAnalyzer (TEST_VERSION_CURRENT, Collator.getInstance(Locale.FRANCE)); Analyzer swedenAnalyzer = new ICUCollationKeyAnalyzer @@ -73,7 +73,7 @@ (TEST_VERSION_CURRENT, Collator.getInstance(new Locale("da", "dk"))); // The ICU Collator and java.text.Collator implementations differ in their - // orderings - "BFJHD" is the ordering for the ICU Collator for Locale.US. + // orderings - "BFJHD" is the ordering for the ICU Collator for Locale.ROOT. testCollationKeySort (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, "BFJHD", "ECAGI", "BJDFH", "BJDHF"); Index: lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateHTMLStripCharFilterSupplementaryMacros.java =================================================================== --- lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateHTMLStripCharFilterSupplementaryMacros.java (revision 1359190) +++ lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateHTMLStripCharFilterSupplementaryMacros.java (working copy) @@ -29,7 +29,7 @@ private static final UnicodeSet BMP = new UnicodeSet("[\u0000-\uFFFF]"); private static final String NL = System.getProperty("line.separator"); private static final DateFormat DATE_FORMAT = DateFormat.getDateTimeInstance - (DateFormat.FULL, DateFormat.FULL, Locale.US); + (DateFormat.FULL, DateFormat.FULL, Locale.ROOT); static { DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC")); } Index: lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateJFlexSupplementaryMacros.java =================================================================== --- lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateJFlexSupplementaryMacros.java (revision 1359190) +++ lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateJFlexSupplementaryMacros.java (working copy) @@ -32,7 +32,7 @@ private static final UnicodeSet BMP = new UnicodeSet("[\u0000-\uFFFF]"); private static final String NL = System.getProperty("line.separator"); private static final DateFormat DATE_FORMAT = DateFormat.getDateTimeInstance - (DateFormat.FULL, DateFormat.FULL, Locale.US); + (DateFormat.FULL, DateFormat.FULL, Locale.ROOT); static { DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC")); } Index: lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java =================================================================== --- lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java (revision 1359190) +++ lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java (working copy) @@ -607,7 +607,7 @@ private void doTestBocchan(int numIterations) throws Exception { LineNumberReader reader = new LineNumberReader(new InputStreamReader( - this.getClass().getResourceAsStream("bocchan.utf-8"))); + this.getClass().getResourceAsStream("bocchan.utf-8"), "UTF-8")); String line = reader.readLine(); reader.close(); Index: lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/StempelStemmer.java =================================================================== --- lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/StempelStemmer.java (revision 1359190) +++ lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/StempelStemmer.java (working copy) @@ -65,7 +65,7 @@ DataInputStream in = null; try { in = new DataInputStream(new BufferedInputStream(stemmerTable)); - String method = in.readUTF().toUpperCase(Locale.ENGLISH); + String method = in.readUTF().toUpperCase(Locale.ROOT); if (method.indexOf('M') < 0) { return new org.egothor.stemmer.Trie(in); } else { Index: lucene/analysis/stempel/src/java/org/egothor/stemmer/Compile.java =================================================================== --- lucene/analysis/stempel/src/java/org/egothor/stemmer/Compile.java (revision 1359190) +++ lucene/analysis/stempel/src/java/org/egothor/stemmer/Compile.java (working copy) @@ -63,6 +63,7 @@ import java.io.IOException; import java.io.InputStreamReader; import java.io.LineNumberReader; +import java.util.Locale; import java.util.StringTokenizer; /** @@ -89,7 +90,7 @@ return; } - args[0].toUpperCase(); + args[0].toUpperCase(Locale.ROOT); backward = args[0].charAt(0) == '-'; int qq = (backward) ? 1 : 0; @@ -127,7 +128,7 @@ new FileInputStream(args[i]), charset))); for (String line = in.readLine(); line != null; line = in.readLine()) { try { - line = line.toLowerCase(); + line = line.toLowerCase(Locale.ROOT); StringTokenizer st = new StringTokenizer(line); String stem = st.nextToken(); if (storeorig) { Index: lucene/analysis/stempel/src/java/org/egothor/stemmer/DiffIt.java =================================================================== --- lucene/analysis/stempel/src/java/org/egothor/stemmer/DiffIt.java (revision 1359190) +++ lucene/analysis/stempel/src/java/org/egothor/stemmer/DiffIt.java (working copy) @@ -55,9 +55,11 @@ package org.egothor.stemmer; import java.io.BufferedReader; -import java.io.FileReader; +import java.io.FileInputStream; import java.io.IOException; +import java.io.InputStreamReader; import java.io.LineNumberReader; +import java.util.Locale; import java.util.StringTokenizer; /** @@ -95,10 +97,11 @@ // System.out.println("[" + args[i] + "]"); Diff diff = new Diff(ins, del, rep, nop); try { - in = new LineNumberReader(new BufferedReader(new FileReader(args[i]))); + String charset = System.getProperty("egothor.stemmer.charset", "UTF-8"); + in = new LineNumberReader(new BufferedReader(new InputStreamReader(new FileInputStream(args[i]), charset))); for (String line = in.readLine(); line != null; line = in.readLine()) { try { - line = line.toLowerCase(); + line = line.toLowerCase(Locale.ROOT); StringTokenizer st = new StringTokenizer(line); String stem = st.nextToken(); System.out.println(stem + " -a"); Index: lucene/analysis/stempel/src/test/org/egothor/stemmer/TestCompile.java =================================================================== --- lucene/analysis/stempel/src/test/org/egothor/stemmer/TestCompile.java (revision 1359190) +++ lucene/analysis/stempel/src/test/org/egothor/stemmer/TestCompile.java (working copy) @@ -60,12 +60,14 @@ import java.io.DataInputStream; import java.io.File; import java.io.FileInputStream; -import java.io.FileReader; import java.io.IOException; +import java.io.InputStreamReader; import java.io.LineNumberReader; import java.net.URI; +import java.util.Locale; import java.util.StringTokenizer; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; public class TestCompile extends LuceneTestCase { @@ -107,7 +109,7 @@ Trie trie; DataInputStream is = new DataInputStream(new BufferedInputStream( new FileInputStream(path))); - String method = is.readUTF().toUpperCase(); + String method = is.readUTF().toUpperCase(Locale.ROOT); if (method.indexOf('M') < 0) { trie = new Trie(is); } else { @@ -120,11 +122,11 @@ private static void assertTrie(Trie trie, String file, boolean usefull, boolean storeorig) throws Exception { LineNumberReader in = new LineNumberReader(new BufferedReader( - new FileReader(file))); + new InputStreamReader(new FileInputStream(file), IOUtils.CHARSET_UTF_8))); for (String line = in.readLine(); line != null; line = in.readLine()) { try { - line = line.toLowerCase(); + line = line.toLowerCase(Locale.ROOT); StringTokenizer st = new StringTokenizer(line); String stem = st.nextToken(); if (storeorig) { @@ -132,7 +134,7 @@ .getLastOnPath(stem); StringBuilder stm = new StringBuilder(stem); Diff.apply(stm, cmd); - assertEquals(stem.toLowerCase(), stm.toString().toLowerCase()); + assertEquals(stem.toLowerCase(Locale.ROOT), stm.toString().toLowerCase(Locale.ROOT)); } while (st.hasMoreTokens()) { String token = st.nextToken(); @@ -143,7 +145,7 @@ .getLastOnPath(token); StringBuilder stm = new StringBuilder(token); Diff.apply(stm, cmd); - assertEquals(stem.toLowerCase(), stm.toString().toLowerCase()); + assertEquals(stem.toLowerCase(Locale.ROOT), stm.toString().toLowerCase(Locale.ROOT)); } } catch (java.util.NoSuchElementException x) { // no base token (stem) on a line Index: lucene/benchmark =================================================================== --- lucene/benchmark (revision 1359190) +++ lucene/benchmark (working copy) Property changes on: lucene/benchmark ___________________________________________________________________ Added: svn:mergeinfo Merged /lucene/dev/branches/lucene4199/lucene/benchmark:r1358548-1359191 Merged /lucene/dev/branches/lucene3969/lucene/benchmark:r1311219-1324948 Merged /lucene/dev/branches/branch_3x/lucene/benchmark:r1232954,1302749,1302808,1303007,1303023,1303269,1303733,1303854,1304295,1304360,1304660,1304904,1305074,1305142,1305681,1305693,1305719,1305741,1305816,1305837,1306929,1307050 Merged /lucene/dev/branches/branch_4x/lucene/benchmark:r1344391,1344929,1348012,1348274,1348293,1348919,1348951,1349048,1349340,1349446,1349991,1353701,1355203,1356608 Merged /lucene/dev/branches/lucene4055/lucene/benchmark:r1338960-1343359 Index: lucene/benchmark/build.xml =================================================================== --- lucene/benchmark/build.xml (revision 1359190) +++ lucene/benchmark/build.xml (working copy) @@ -262,9 +262,11 @@ - - - + + + + + Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java (working copy) @@ -23,6 +23,7 @@ import org.apache.lucene.benchmark.byTask.utils.Algorithm; import org.apache.lucene.benchmark.byTask.utils.Config; +import org.apache.lucene.util.IOUtils; /** @@ -106,7 +107,7 @@ Benchmark benchmark = null; try { - benchmark = new Benchmark(new FileReader(algFile)); + benchmark = new Benchmark(IOUtils.getDecodingReader(algFile, IOUtils.CHARSET_UTF_8)); } catch (Exception e) { e.printStackTrace(); System.exit(1); Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/CharStream.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/CharStream.java (revision 0) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/CharStream.java (working copy) @@ -0,0 +1,112 @@ +/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 4.1 */ +/* JavaCCOptions:STATIC=false */ +package org.apache.lucene.benchmark.byTask.feeds.demohtml; + +/** + * This interface describes a character stream that maintains line and + * column number positions of the characters. It also has the capability + * to backup the stream to some extent. An implementation of this + * interface is used in the TokenManager implementation generated by + * JavaCCParser. + * + * All the methods except backup can be implemented in any fashion. backup + * needs to be implemented correctly for the correct operation of the lexer. + * Rest of the methods are all used to get information like line number, + * column number and the String that constitutes a token and are not used + * by the lexer. Hence their implementation won't affect the generated lexer's + * operation. + */ + +public interface CharStream { + + /** + * Returns the next character from the selected input. The method + * of selecting the input is the responsibility of the class + * implementing this interface. Can throw any java.io.IOException. + */ + char readChar() throws java.io.IOException; + + /** + * Returns the column position of the character last read. + * @deprecated + * @see #getEndColumn + */ + int getColumn(); + + /** + * Returns the line number of the character last read. + * @deprecated + * @see #getEndLine + */ + int getLine(); + + /** + * Returns the column number of the last character for current token (being + * matched after the last call to BeginTOken). + */ + int getEndColumn(); + + /** + * Returns the line number of the last character for current token (being + * matched after the last call to BeginTOken). + */ + int getEndLine(); + + /** + * Returns the column number of the first character for current token (being + * matched after the last call to BeginTOken). + */ + int getBeginColumn(); + + /** + * Returns the line number of the first character for current token (being + * matched after the last call to BeginTOken). + */ + int getBeginLine(); + + /** + * Backs up the input stream by amount steps. Lexer calls this method if it + * had already read some characters, but could not use them to match a + * (longer) token. So, they will be used again as the prefix of the next + * token and it is the implemetation's responsibility to do this right. + */ + void backup(int amount); + + /** + * Returns the next character that marks the beginning of the next token. + * All characters must remain in the buffer between two successive calls + * to this method to implement backup correctly. + */ + char BeginToken() throws java.io.IOException; + + /** + * Returns a string made up of characters from the marked token beginning + * to the current buffer position. Implementations have the choice of returning + * anything that they want to. For example, for efficiency, one might decide + * to just return null, which is a valid implementation. + */ + String GetImage(); + + /** + * Returns an array of characters that make up the suffix of length 'len' for + * the currently matched token. This is used to build up the matched string + * for use in actions in the case of MORE. A simple and inefficient + * implementation of this is as follows : + * + * { + * String t = GetImage(); + * return t.substring(t.length() - len, t.length()).toCharArray(); + * } + */ + char[] GetSuffix(int len); + + /** + * The lexer calls this function to indicate that it is done with the stream + * and hence implementations can free any resources held by this class. + * Again, the body of this function can be just empty and it will not + * affect the lexer's operation. + */ + void Done(); + +} +/* JavaCC - OriginalChecksum=e26d9399cd34335f985e19c1fa86c11b (do not edit this line) */ Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/CharStream.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/CharStream.java (revision 1359191) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/CharStream.java (working copy) Property changes on: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/CharStream.java ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +Date Author Id Revision HeadURL \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/FastCharStream.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/FastCharStream.java (revision 0) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/FastCharStream.java (working copy) @@ -0,0 +1,123 @@ +// FastCharStream.java +package org.apache.lucene.benchmark.byTask.feeds.demohtml; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +import java.io.*; + +/** An efficient implementation of JavaCC's CharStream interface.

Note that + * this does not do line-number counting, but instead keeps track of the + * character position of the token in the input, as required by Lucene's {@link + * org.apache.lucene.analysis.Token} API. + * */ +public final class FastCharStream implements CharStream { + char[] buffer = null; + + int bufferLength = 0; // end of valid chars + int bufferPosition = 0; // next char to read + + int tokenStart = 0; // offset in buffer + int bufferStart = 0; // position in file of buffer + + Reader input; // source of chars + + /** Constructs from a Reader. */ + public FastCharStream(Reader r) { + input = r; + } + + public final char readChar() throws IOException { + if (bufferPosition >= bufferLength) + refill(); + return buffer[bufferPosition++]; + } + + private final void refill() throws IOException { + int newPosition = bufferLength - tokenStart; + + if (tokenStart == 0) { // token won't fit in buffer + if (buffer == null) { // first time: alloc buffer + buffer = new char[2048]; + } else if (bufferLength == buffer.length) { // grow buffer + char[] newBuffer = new char[buffer.length*2]; + System.arraycopy(buffer, 0, newBuffer, 0, bufferLength); + buffer = newBuffer; + } + } else { // shift token to front + System.arraycopy(buffer, tokenStart, buffer, 0, newPosition); + } + + bufferLength = newPosition; // update state + bufferPosition = newPosition; + bufferStart += tokenStart; + tokenStart = 0; + + int charsRead = // fill space in buffer + input.read(buffer, newPosition, buffer.length-newPosition); + if (charsRead == -1) + throw new IOException("read past eof"); + else + bufferLength += charsRead; + } + + public final char BeginToken() throws IOException { + tokenStart = bufferPosition; + return readChar(); + } + + public final void backup(int amount) { + bufferPosition -= amount; + } + + public final String GetImage() { + return new String(buffer, tokenStart, bufferPosition - tokenStart); + } + + public final char[] GetSuffix(int len) { + char[] value = new char[len]; + System.arraycopy(buffer, bufferPosition - len, value, 0, len); + return value; + } + + public final void Done() { + try { + input.close(); + } catch (IOException e) { + } + } + + public final int getColumn() { + return bufferStart + bufferPosition; + } + public final int getLine() { + return 1; + } + public final int getEndColumn() { + return bufferStart + bufferPosition; + } + public final int getEndLine() { + return 1; + } + public final int getBeginColumn() { + return bufferStart + tokenStart; + } + public final int getBeginLine() { + return 1; + } +} Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/FastCharStream.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/FastCharStream.java (revision 1359191) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/FastCharStream.java (working copy) Property changes on: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/FastCharStream.java ___________________________________________________________________ Added: cvs2svn:cvs-rev ## -0,0 +1 ## +1.3 \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +Author Date Id Revision \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java (working copy) @@ -29,6 +29,10 @@ private MyPipedInputStream pipeInStream = null; private PipedOutputStream pipeOutStream = null; + public HTMLParser(Reader reader) { + this(new FastCharStream(reader)); + } + private class MyPipedInputStream extends PipedInputStream{ public MyPipedInputStream(){ @@ -227,7 +231,7 @@ Token t1, t2; boolean inImg = false; t1 = jj_consume_token(TagName); - String tagName = t1.image.toLowerCase(Locale.ENGLISH); + String tagName = t1.image.toLowerCase(Locale.ROOT); if(Tags.WS_ELEMS.contains(tagName) ) { addSpace(); } @@ -264,7 +268,7 @@ ) && t2 != null) { - currentMetaTag=t2.image.toLowerCase(Locale.ENGLISH); + currentMetaTag=t2.image.toLowerCase(Locale.ROOT); if(currentMetaTag != null && currentMetaContent != null) { addMetaTag(); } @@ -272,7 +276,7 @@ if(inMetaTag && t1.image.equalsIgnoreCase("content") && t2 != null) { - currentMetaContent=t2.image.toLowerCase(Locale.ENGLISH); + currentMetaContent=t2.image.toLowerCase(Locale.ROOT); if(currentMetaTag != null && currentMetaContent != null) { addMetaTag(); } @@ -464,7 +468,6 @@ /** Generated Token Manager. */ public HTMLParserTokenManager token_source; - SimpleCharStream jj_input_stream; /** Current token. */ public Token token; /** Next token. */ @@ -485,14 +488,9 @@ private boolean jj_rescan = false; private int jj_gc = 0; - /** Constructor with InputStream. */ - public HTMLParser(java.io.InputStream stream) { - this(stream, null); - } - /** Constructor with InputStream and supplied encoding */ - public HTMLParser(java.io.InputStream stream, String encoding) { - try { jj_input_stream = new SimpleCharStream(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); } - token_source = new HTMLParserTokenManager(jj_input_stream); + /** Constructor with user supplied CharStream. */ + public HTMLParser(CharStream stream) { + token_source = new HTMLParserTokenManager(stream); token = new Token(); jj_ntk = -1; jj_gen = 0; @@ -501,13 +499,8 @@ } /** Reinitialise. */ - public void ReInit(java.io.InputStream stream) { - ReInit(stream, null); - } - /** Reinitialise. */ - public void ReInit(java.io.InputStream stream, String encoding) { - try { jj_input_stream.ReInit(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); } - token_source.ReInit(jj_input_stream); + public void ReInit(CharStream stream) { + token_source.ReInit(stream); token = new Token(); jj_ntk = -1; jj_gen = 0; @@ -515,28 +508,6 @@ for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); } - /** Constructor. */ - public HTMLParser(java.io.Reader stream) { - jj_input_stream = new SimpleCharStream(stream, 1, 1); - token_source = new HTMLParserTokenManager(jj_input_stream); - token = new Token(); - jj_ntk = -1; - jj_gen = 0; - for (int i = 0; i < 14; i++) jj_la1[i] = -1; - for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); - } - - /** Reinitialise. */ - public void ReInit(java.io.Reader stream) { - jj_input_stream.ReInit(stream, 1, 1); - token_source.ReInit(jj_input_stream); - token = new Token(); - jj_ntk = -1; - jj_gen = 0; - for (int i = 0; i < 14; i++) jj_la1[i] = -1; - for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); - } - /** Constructor with generated Token Manager. */ public HTMLParser(HTMLParserTokenManager tm) { token_source = tm; @@ -631,7 +602,7 @@ return (jj_ntk = jj_nt.kind); } - private java.util.List jj_expentries = new java.util.ArrayList(); + private java.util.List jj_expentries = new java.util.ArrayList(); private int[] jj_expentry; private int jj_kind = -1; private int[] jj_lasttokens = new int[100]; @@ -691,7 +662,7 @@ jj_add_error_token(0, 0); int[][] exptokseq = new int[jj_expentries.size()][]; for (int i = 0; i < jj_expentries.size(); i++) { - exptokseq[i] = jj_expentries.get(i); + exptokseq[i] = (int[])jj_expentries.get(i); } return new ParseException(token, exptokseq, tokenImage); } Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.jj =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.jj (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.jj (working copy) @@ -22,6 +22,7 @@ //DEBUG_LOOKAHEAD = true; //DEBUG_TOKEN_MANAGER = true; UNICODE_INPUT = true; + USER_CHAR_STREAM=true; } PARSER_BEGIN(HTMLParser) @@ -56,6 +57,10 @@ private MyPipedInputStream pipeInStream = null; private PipedOutputStream pipeOutStream = null; + public HTMLParser(Reader reader) { + this(new FastCharStream(reader)); + } + private class MyPipedInputStream extends PipedInputStream{ public MyPipedInputStream(){ @@ -227,7 +232,7 @@ } { t1= { - String tagName = t1.image.toLowerCase(Locale.ENGLISH); + String tagName = t1.image.toLowerCase(Locale.ROOT); if(Tags.WS_ELEMS.contains(tagName) ) { addSpace(); } @@ -249,7 +254,7 @@ ) && t2 != null) { - currentMetaTag=t2.image.toLowerCase(Locale.ENGLISH); + currentMetaTag=t2.image.toLowerCase(Locale.ROOT); if(currentMetaTag != null && currentMetaContent != null) { addMetaTag(); } @@ -257,7 +262,7 @@ if(inMetaTag && t1.image.equalsIgnoreCase("content") && t2 != null) { - currentMetaContent=t2.image.toLowerCase(Locale.ENGLISH); + currentMetaContent=t2.image.toLowerCase(Locale.ROOT); if(currentMetaTag != null && currentMetaContent != null) { addMetaTag(); } Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParserTokenManager.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParserTokenManager.java (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParserTokenManager.java (working copy) @@ -464,7 +464,7 @@ } else { - int hiByte = (curChar >> 8); + int hiByte = (int)(curChar >> 8); int i1 = hiByte >> 6; long l1 = 1L << (hiByte & 077); int i2 = (curChar & 0xff) >> 6; @@ -569,7 +569,7 @@ } else { - int hiByte = (curChar >> 8); + int hiByte = (int)(curChar >> 8); int i1 = hiByte >> 6; long l1 = 1L << (hiByte & 077); int i2 = (curChar & 0xff) >> 6; @@ -670,7 +670,7 @@ } else { - int hiByte = (curChar >> 8); + int hiByte = (int)(curChar >> 8); int i1 = hiByte >> 6; long l1 = 1L << (hiByte & 077); int i2 = (curChar & 0xff) >> 6; @@ -766,7 +766,7 @@ } else { - int hiByte = (curChar >> 8); + int hiByte = (int)(curChar >> 8); int i1 = hiByte >> 6; long l1 = 1L << (hiByte & 077); int i2 = (curChar & 0xff) >> 6; @@ -892,7 +892,7 @@ } else { - int hiByte = (curChar >> 8); + int hiByte = (int)(curChar >> 8); int i1 = hiByte >> 6; long l1 = 1L << (hiByte & 077); int i2 = (curChar & 0xff) >> 6; @@ -1061,7 +1061,7 @@ } else { - int hiByte = (curChar >> 8); + int hiByte = (int)(curChar >> 8); int i1 = hiByte >> 6; long l1 = 1L << (hiByte & 077); int i2 = (curChar & 0xff) >> 6; @@ -1205,7 +1205,7 @@ } else { - int hiByte = (curChar >> 8); + int hiByte = (int)(curChar >> 8); int i1 = hiByte >> 6; long l1 = 1L << (hiByte & 077); int i2 = (curChar & 0xff) >> 6; @@ -1361,7 +1361,7 @@ } else { - int hiByte = (curChar >> 8); + int hiByte = (int)(curChar >> 8); int i1 = hiByte >> 6; long l1 = 1L << (hiByte & 077); int i2 = (curChar & 0xff) >> 6; @@ -1441,25 +1441,23 @@ static final long[] jjtoSkip = { 0x400000L, }; -protected SimpleCharStream input_stream; +protected CharStream input_stream; private final int[] jjrounds = new int[28]; private final int[] jjstateSet = new int[56]; protected char curChar; /** Constructor. */ -public HTMLParserTokenManager(SimpleCharStream stream){ - if (SimpleCharStream.staticFlag) - throw new Error("ERROR: Cannot use a static CharStream class with a non-static lexical analyzer."); +public HTMLParserTokenManager(CharStream stream){ input_stream = stream; } /** Constructor. */ -public HTMLParserTokenManager(SimpleCharStream stream, int lexState){ +public HTMLParserTokenManager(CharStream stream, int lexState){ this(stream); SwitchTo(lexState); } /** Reinitialise parser. */ -public void ReInit(SimpleCharStream stream) +public void ReInit(CharStream stream) { jjmatchedPos = jjnewStateCnt = 0; curLexState = defaultLexState; @@ -1475,7 +1473,7 @@ } /** Reinitialise parser. */ -public void ReInit(SimpleCharStream stream, int lexState) +public void ReInit(CharStream stream, int lexState) { ReInit(stream); SwitchTo(lexState); Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/ParseException.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/ParseException.java (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/ParseException.java (working copy) @@ -195,4 +195,4 @@ } } -/* JavaCC - OriginalChecksum=e5376178619291bc9d2c0c6647dc3cef (do not edit this line) */ +/* JavaCC - OriginalChecksum=e449d0e43f3d85deb1260a88b7e90fcd (do not edit this line) */ Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/SimpleCharStream.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/SimpleCharStream.java (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/SimpleCharStream.java (working copy) @@ -1,472 +0,0 @@ -/* Generated By:JavaCC: Do not edit this line. SimpleCharStream.java Version 4.1 */ -/* JavaCCOptions:STATIC=false */ -package org.apache.lucene.benchmark.byTask.feeds.demohtml; - -/** - * An implementation of interface CharStream, where the stream is assumed to - * contain only ASCII characters (without unicode processing). - */ - -public class SimpleCharStream -{ -/** Whether parser is static. */ - public static final boolean staticFlag = false; - int bufsize; - int available; - int tokenBegin; -/** Position in buffer. */ - public int bufpos = -1; - protected int bufline[]; - protected int bufcolumn[]; - - protected int column = 0; - protected int line = 1; - - protected boolean prevCharIsCR = false; - protected boolean prevCharIsLF = false; - - protected java.io.Reader inputStream; - - protected char[] buffer; - protected int maxNextCharInd = 0; - protected int inBuf = 0; - protected int tabSize = 8; - - protected void setTabSize(int i) { tabSize = i; } - protected int getTabSize(int i) { return tabSize; } - - - protected void ExpandBuff(boolean wrapAround) - { - char[] newbuffer = new char[bufsize + 2048]; - int newbufline[] = new int[bufsize + 2048]; - int newbufcolumn[] = new int[bufsize + 2048]; - - try - { - if (wrapAround) - { - System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin); - System.arraycopy(buffer, 0, newbuffer, - bufsize - tokenBegin, bufpos); - buffer = newbuffer; - - System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin); - System.arraycopy(bufline, 0, newbufline, bufsize - tokenBegin, bufpos); - bufline = newbufline; - - System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin); - System.arraycopy(bufcolumn, 0, newbufcolumn, bufsize - tokenBegin, bufpos); - bufcolumn = newbufcolumn; - - maxNextCharInd = (bufpos += (bufsize - tokenBegin)); - } - else - { - System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin); - buffer = newbuffer; - - System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin); - bufline = newbufline; - - System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin); - bufcolumn = newbufcolumn; - - maxNextCharInd = (bufpos -= tokenBegin); - } - } - catch (Throwable t) - { - throw new Error(t.getMessage()); - } - - - bufsize += 2048; - available = bufsize; - tokenBegin = 0; - } - - protected void FillBuff() throws java.io.IOException - { - if (maxNextCharInd == available) - { - if (available == bufsize) - { - if (tokenBegin > 2048) - { - bufpos = maxNextCharInd = 0; - available = tokenBegin; - } - else if (tokenBegin < 0) - bufpos = maxNextCharInd = 0; - else - ExpandBuff(false); - } - else if (available > tokenBegin) - available = bufsize; - else if ((tokenBegin - available) < 2048) - ExpandBuff(true); - else - available = tokenBegin; - } - - int i; - try { - if ((i = inputStream.read(buffer, maxNextCharInd, - available - maxNextCharInd)) == -1) - { - inputStream.close(); - throw new java.io.IOException(); - } - else - maxNextCharInd += i; - return; - } - catch(java.io.IOException e) { - --bufpos; - backup(0); - if (tokenBegin == -1) - tokenBegin = bufpos; - throw e; - } - } - -/** Start. */ - public char BeginToken() throws java.io.IOException - { - tokenBegin = -1; - char c = readChar(); - tokenBegin = bufpos; - - return c; - } - - protected void UpdateLineColumn(char c) - { - column++; - - if (prevCharIsLF) - { - prevCharIsLF = false; - line += (column = 1); - } - else if (prevCharIsCR) - { - prevCharIsCR = false; - if (c == '\n') - { - prevCharIsLF = true; - } - else - line += (column = 1); - } - - switch (c) - { - case '\r' : - prevCharIsCR = true; - break; - case '\n' : - prevCharIsLF = true; - break; - case '\t' : - column--; - column += (tabSize - (column % tabSize)); - break; - default : - break; - } - - bufline[bufpos] = line; - bufcolumn[bufpos] = column; - } - -/** Read a character. */ - public char readChar() throws java.io.IOException - { - if (inBuf > 0) - { - --inBuf; - - if (++bufpos == bufsize) - bufpos = 0; - - return buffer[bufpos]; - } - - if (++bufpos >= maxNextCharInd) - FillBuff(); - - char c = buffer[bufpos]; - - UpdateLineColumn(c); - return c; - } - - /** - * @deprecated - * @see #getEndColumn - */ - - public int getColumn() { - return bufcolumn[bufpos]; - } - - /** - * @deprecated - * @see #getEndLine - */ - - public int getLine() { - return bufline[bufpos]; - } - - /** Get token end column number. */ - public int getEndColumn() { - return bufcolumn[bufpos]; - } - - /** Get token end line number. */ - public int getEndLine() { - return bufline[bufpos]; - } - - /** Get token beginning column number. */ - public int getBeginColumn() { - return bufcolumn[tokenBegin]; - } - - /** Get token beginning line number. */ - public int getBeginLine() { - return bufline[tokenBegin]; - } - -/** Backup a number of characters. */ - public void backup(int amount) { - - inBuf += amount; - if ((bufpos -= amount) < 0) - bufpos += bufsize; - } - - /** Constructor. */ - public SimpleCharStream(java.io.Reader dstream, int startline, - int startcolumn, int buffersize) - { - inputStream = dstream; - line = startline; - column = startcolumn - 1; - - available = bufsize = buffersize; - buffer = new char[buffersize]; - bufline = new int[buffersize]; - bufcolumn = new int[buffersize]; - } - - /** Constructor. */ - public SimpleCharStream(java.io.Reader dstream, int startline, - int startcolumn) - { - this(dstream, startline, startcolumn, 4096); - } - - /** Constructor. */ - public SimpleCharStream(java.io.Reader dstream) - { - this(dstream, 1, 1, 4096); - } - - /** Reinitialise. */ - public void ReInit(java.io.Reader dstream, int startline, - int startcolumn, int buffersize) - { - inputStream = dstream; - line = startline; - column = startcolumn - 1; - - if (buffer == null || buffersize != buffer.length) - { - available = bufsize = buffersize; - buffer = new char[buffersize]; - bufline = new int[buffersize]; - bufcolumn = new int[buffersize]; - } - prevCharIsLF = prevCharIsCR = false; - tokenBegin = inBuf = maxNextCharInd = 0; - bufpos = -1; - } - - /** Reinitialise. */ - public void ReInit(java.io.Reader dstream, int startline, - int startcolumn) - { - ReInit(dstream, startline, startcolumn, 4096); - } - - /** Reinitialise. */ - public void ReInit(java.io.Reader dstream) - { - ReInit(dstream, 1, 1, 4096); - } - /** Constructor. */ - public SimpleCharStream(java.io.InputStream dstream, String encoding, int startline, - int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException - { - this(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize); - } - - /** Constructor. */ - public SimpleCharStream(java.io.InputStream dstream, int startline, - int startcolumn, int buffersize) - { - this(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize); - } - - /** Constructor. */ - public SimpleCharStream(java.io.InputStream dstream, String encoding, int startline, - int startcolumn) throws java.io.UnsupportedEncodingException - { - this(dstream, encoding, startline, startcolumn, 4096); - } - - /** Constructor. */ - public SimpleCharStream(java.io.InputStream dstream, int startline, - int startcolumn) - { - this(dstream, startline, startcolumn, 4096); - } - - /** Constructor. */ - public SimpleCharStream(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException - { - this(dstream, encoding, 1, 1, 4096); - } - - /** Constructor. */ - public SimpleCharStream(java.io.InputStream dstream) - { - this(dstream, 1, 1, 4096); - } - - /** Reinitialise. */ - public void ReInit(java.io.InputStream dstream, String encoding, int startline, - int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException - { - ReInit(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize); - } - - /** Reinitialise. */ - public void ReInit(java.io.InputStream dstream, int startline, - int startcolumn, int buffersize) - { - ReInit(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize); - } - - /** Reinitialise. */ - public void ReInit(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException - { - ReInit(dstream, encoding, 1, 1, 4096); - } - - /** Reinitialise. */ - public void ReInit(java.io.InputStream dstream) - { - ReInit(dstream, 1, 1, 4096); - } - /** Reinitialise. */ - public void ReInit(java.io.InputStream dstream, String encoding, int startline, - int startcolumn) throws java.io.UnsupportedEncodingException - { - ReInit(dstream, encoding, startline, startcolumn, 4096); - } - /** Reinitialise. */ - public void ReInit(java.io.InputStream dstream, int startline, - int startcolumn) - { - ReInit(dstream, startline, startcolumn, 4096); - } - /** Get token literal value. */ - public String GetImage() - { - if (bufpos >= tokenBegin) - return new String(buffer, tokenBegin, bufpos - tokenBegin + 1); - else - return new String(buffer, tokenBegin, bufsize - tokenBegin) + - new String(buffer, 0, bufpos + 1); - } - - /** Get the suffix. */ - public char[] GetSuffix(int len) - { - char[] ret = new char[len]; - - if ((bufpos + 1) >= len) - System.arraycopy(buffer, bufpos - len + 1, ret, 0, len); - else - { - System.arraycopy(buffer, bufsize - (len - bufpos - 1), ret, 0, - len - bufpos - 1); - System.arraycopy(buffer, 0, ret, len - bufpos - 1, bufpos + 1); - } - - return ret; - } - - /** Reset buffer when finished. */ - public void Done() - { - buffer = null; - bufline = null; - bufcolumn = null; - } - - /** - * Method to adjust line and column numbers for the start of a token. - */ - public void adjustBeginLineColumn(int newLine, int newCol) - { - int start = tokenBegin; - int len; - - if (bufpos >= tokenBegin) - { - len = bufpos - tokenBegin + inBuf + 1; - } - else - { - len = bufsize - tokenBegin + bufpos + 1 + inBuf; - } - - int i = 0, j = 0, k = 0; - int nextColDiff = 0, columnDiff = 0; - - while (i < len && - bufline[j = start % bufsize] == bufline[k = ++start % bufsize]) - { - bufline[j] = newLine; - nextColDiff = columnDiff + bufcolumn[k] - bufcolumn[j]; - bufcolumn[j] = newCol + columnDiff; - columnDiff = nextColDiff; - i++; - } - - if (i < len) - { - bufline[j] = newLine++; - bufcolumn[j] = newCol + columnDiff; - - while (i++ < len) - { - if (bufline[j = start % bufsize] != bufline[++start % bufsize]) - bufline[j] = newLine++; - else - bufline[j] = newLine; - } - } - - line = bufline[j]; - column = bufcolumn[j]; - } - -} -/* JavaCC - OriginalChecksum=7c2e625567f11c3058995b779d0149ad (do not edit this line) */ Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/Token.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/Token.java (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/Token.java (working copy) @@ -121,4 +121,4 @@ } } -/* JavaCC - OriginalChecksum=e49c2a0c10d50ff2ebd0639552330ce7 (do not edit this line) */ +/* JavaCC - OriginalChecksum=24643dc85fd6daeec42ceba20b46ee61 (do not edit this line) */ Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/TokenMgrError.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/TokenMgrError.java (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/TokenMgrError.java (working copy) @@ -138,4 +138,4 @@ this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason); } } -/* JavaCC - OriginalChecksum=3aee554f696e5d7a18b1ad330c1de53f (do not edit this line) */ +/* JavaCC - OriginalChecksum=538f0da130356fcc0bc7db621ab0389d (do not edit this line) */ Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirContentSource.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirContentSource.java (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirContentSource.java (working copy) @@ -18,12 +18,14 @@ */ import org.apache.lucene.benchmark.byTask.utils.Config; +import org.apache.lucene.util.IOUtils; import java.io.BufferedReader; import java.io.File; import java.io.FileFilter; -import java.io.FileReader; +import java.io.FileInputStream; import java.io.IOException; +import java.io.InputStreamReader; import java.text.DateFormat; import java.text.ParsePosition; import java.text.SimpleDateFormat; @@ -161,7 +163,7 @@ dfi = new DateFormatInfo(); dfi.pos = new ParsePosition(0); // date format: 30-MAR-1987 14:22:36.87 - dfi.df = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS", Locale.US); + dfi.df = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS", Locale.ROOT); dfi.df.setLenient(true); dateFormat.set(dfi); } @@ -198,7 +200,7 @@ name = f.getCanonicalPath()+"_"+iteration; } - BufferedReader reader = new BufferedReader(new FileReader(f)); + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(f), IOUtils.CHARSET_UTF_8)); String line = null; //First line is the date, 3rd is the title, rest is body String dateStr = reader.readLine(); Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (working copy) @@ -29,6 +29,7 @@ import java.util.Map; import java.util.Properties; import java.util.Random; +import java.util.TimeZone; import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.benchmark.byTask.utils.Config; @@ -182,8 +183,8 @@ private boolean storeBytes = false; private static class DateUtil { - public SimpleDateFormat parser = new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss", Locale.US); - public Calendar cal = Calendar.getInstance(); + public SimpleDateFormat parser = new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss", Locale.ROOT); + public Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("GMT"), Locale.ROOT); public ParsePosition pos = new ParsePosition(0); public DateUtil() { parser.setLenient(true); Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java (working copy) @@ -25,6 +25,7 @@ import java.nio.charset.CharsetDecoder; import java.nio.charset.CodingErrorAction; import java.util.HashMap; +import java.util.Locale; import java.util.Map; import org.apache.lucene.benchmark.byTask.utils.Config; @@ -146,7 +147,7 @@ case BODY: body = contents.toString(); //workaround that startswith doesn't have an ignore case option, get at least 20 chars. - String startsWith = body.substring(0, Math.min(10, contents.length())).toLowerCase(); + String startsWith = body.substring(0, Math.min(10, contents.length())).toLowerCase(Locale.ROOT); if (startsWith.startsWith("#redirect")) { body = null; } Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FileBasedQueryMaker.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FileBasedQueryMaker.java (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FileBasedQueryMaker.java (working copy) @@ -5,6 +5,7 @@ import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.Query; import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.Version; import java.io.*; @@ -59,13 +60,14 @@ { File file = new File(fileName); Reader reader = null; + // note: we use a decoding reader, so if your queries are screwed up you know if (file.exists()) { - reader = new FileReader(file); + reader = IOUtils.getDecodingReader(file, IOUtils.CHARSET_UTF_8); } else { //see if we can find it as a resource InputStream asStream = FileBasedQueryMaker.class.getClassLoader().getResourceAsStream(fileName); if (asStream != null) { - reader = new InputStreamReader(asStream); + reader = IOUtils.getDecodingReader(asStream, IOUtils.CHARSET_UTF_8); } } if (reader != null) { Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishContentSource.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishContentSource.java (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishContentSource.java (working copy) @@ -35,7 +35,7 @@ } // TODO: we could take param to specify locale... - private final RuleBasedNumberFormat rnbf = new RuleBasedNumberFormat(Locale.ENGLISH, + private final RuleBasedNumberFormat rnbf = new RuleBasedNumberFormat(Locale.ROOT, RuleBasedNumberFormat.SPELLOUT); @Override public synchronized DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException { Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishQueryMaker.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishQueryMaker.java (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishQueryMaker.java (working copy) @@ -37,7 +37,7 @@ protected QueryParser parser; // TODO: we could take param to specify locale... - private final RuleBasedNumberFormat rnbf = new RuleBasedNumberFormat(Locale.ENGLISH, + private final RuleBasedNumberFormat rnbf = new RuleBasedNumberFormat(Locale.ROOT, RuleBasedNumberFormat.SPELLOUT); public Query makeQuery(int size) throws Exception { Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersContentSource.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersContentSource.java (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersContentSource.java (working copy) @@ -19,8 +19,9 @@ import java.io.BufferedReader; import java.io.File; -import java.io.FileReader; +import java.io.FileInputStream; import java.io.IOException; +import java.io.InputStreamReader; import java.text.DateFormat; import java.text.ParsePosition; import java.text.SimpleDateFormat; @@ -29,6 +30,7 @@ import java.util.Locale; import org.apache.lucene.benchmark.byTask.utils.Config; +import org.apache.lucene.util.IOUtils; /** * A {@link ContentSource} reading from the Reuters collection. @@ -74,7 +76,7 @@ if (dfi == null) { dfi = new DateFormatInfo(); // date format: 30-MAR-1987 14:22:36.87 - dfi.df = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.US); + dfi.df = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.ROOT); dfi.df.setLenient(true); dfi.pos = new ParsePosition(0); dateFormat.set(dfi); @@ -112,7 +114,7 @@ name = f.getCanonicalPath() + "_" + iteration; } - BufferedReader reader = new BufferedReader(new FileReader(f)); + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(f), IOUtils.CHARSET_UTF_8)); try { // First line is the date, 3rd is the title, rest is body String dateStr = reader.readLine(); Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java (working copy) @@ -108,7 +108,7 @@ dfi = new DateFormatInfo(); dfi.dfs = new SimpleDateFormat[DATE_FORMATS.length]; for (int i = 0; i < dfi.dfs.length; i++) { - dfi.dfs[i] = new SimpleDateFormat(DATE_FORMATS[i], Locale.US); + dfi.dfs[i] = new SimpleDateFormat(DATE_FORMATS[i], Locale.ROOT); dfi.dfs[i].setLenient(true); } dfi.pos = new ParsePosition(0); Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java (working copy) @@ -47,7 +47,7 @@ static final Map pathName2Type = new HashMap(); static { for (ParsePathType ppt : ParsePathType.values()) { - pathName2Type.put(ppt.name().toUpperCase(Locale.ENGLISH),ppt); + pathName2Type.put(ppt.name().toUpperCase(Locale.ROOT),ppt); } } @@ -60,7 +60,7 @@ public static ParsePathType pathType(File f) { int pathLength = 0; while (f != null && ++pathLength < MAX_PATH_LENGTH) { - ParsePathType ppt = pathName2Type.get(f.getName().toUpperCase(Locale.ENGLISH)); + ParsePathType ppt = pathName2Type.get(f.getName().toUpperCase(Locale.ROOT)); if (ppt!=null) { return ppt; } Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java (working copy) @@ -18,6 +18,7 @@ */ import java.text.NumberFormat; +import java.util.Locale; import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.feeds.DocMaker; @@ -61,7 +62,7 @@ @Override protected String getLogMessage(int recsCount) { - return String.format("added %9d docs",recsCount); + return String.format(Locale.ROOT, "added %9d docs",recsCount); } @Override Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java (working copy) @@ -40,6 +40,7 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintStream; +import java.nio.charset.Charset; /** * Create an index.
@@ -182,7 +183,7 @@ iwc.setInfoStream(System.err); } else { File f = new File(infoStreamVal).getAbsoluteFile(); - iwc.setInfoStream(new PrintStream(new BufferedOutputStream(new FileOutputStream(f)))); + iwc.setInfoStream(new PrintStream(new BufferedOutputStream(new FileOutputStream(f)), false, Charset.defaultCharset().name())); } } IndexWriter writer = new IndexWriter(runData.getDirectory(), iwc); Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java (working copy) @@ -17,6 +17,8 @@ * limitations under the License. */ +import java.util.Locale; + import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.stats.Points; import org.apache.lucene.benchmark.byTask.stats.TaskStats; @@ -266,7 +268,7 @@ public void tearDown() throws Exception { if (++logStepCount % logStep == 0) { double time = (System.currentTimeMillis() - runData.getStartTimeMillis()) / 1000.0; - System.out.println(String.format("%7.2f",time) + " sec --> " + System.out.println(String.format(Locale.ROOT, "%7.2f",time) + " sec --> " + Thread.currentThread().getName() + " " + getLogMessage(logStepCount)); } } Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java (working copy) @@ -77,7 +77,7 @@ } else { throw new RuntimeException("You must specify the sort type ie page:int,subject:string"); } - sortField0 = new SortField(fieldName, SortField.Type.valueOf(typeString.toUpperCase(Locale.ENGLISH))); + sortField0 = new SortField(fieldName, SortField.Type.valueOf(typeString.toUpperCase(Locale.ROOT))); } sortFields[upto++] = sortField0; } Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java (working copy) @@ -19,6 +19,7 @@ import java.util.ArrayList; import java.util.List; +import java.util.Locale; import java.text.NumberFormat; import org.apache.lucene.benchmark.byTask.PerfRunData; @@ -428,7 +429,7 @@ sb.append(padd); sb.append(!letChildReport ? ">" : (parallel ? "]" : "}")); if (fixedTime) { - sb.append(" " + NumberFormat.getNumberInstance().format(runTimeSec) + "s"); + sb.append(" " + NumberFormat.getNumberInstance(Locale.ROOT).format(runTimeSec) + "s"); } else if (repetitions>1) { sb.append(" * " + repetitions); } else if (repetitions==REPEAT_EXHAUST) { @@ -487,7 +488,7 @@ if (rate>0) { seqName += "_" + rate + (perMin?"/min":"/sec"); } - if (parallel && seqName.toLowerCase().indexOf("par")<0) { + if (parallel && seqName.toLowerCase(Locale.ROOT).indexOf("par")<0) { seqName += "_Par"; } } Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java (working copy) @@ -22,6 +22,7 @@ import java.lang.reflect.Constructor; import java.util.ArrayList; import java.util.Arrays; +import java.util.Locale; import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.tasks.PerfTask; @@ -159,7 +160,7 @@ } else { stok.nextToken(); if (stok.ttype!=StreamTokenizer.TT_WORD) throw new Exception("expected rate unit: 'min' or 'sec' - "+stok.toString()); - String unit = stok.sval.toLowerCase(); + String unit = stok.sval.toLowerCase(Locale.ROOT); if ("min".equals(unit)) { ((TaskSequence)prevTask).setRate((int)stok.nval,true); // set rate per min } else if ("sec".equals(unit)) { Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Format.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Format.java (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Format.java (working copy) @@ -18,6 +18,7 @@ */ import java.text.NumberFormat; +import java.util.Locale; /** * Formatting utilities (for reports). @@ -25,9 +26,9 @@ public class Format { private static NumberFormat numFormat [] = { - NumberFormat.getInstance(), - NumberFormat.getInstance(), - NumberFormat.getInstance(), + NumberFormat.getInstance(Locale.ROOT), + NumberFormat.getInstance(Locale.ROOT), + NumberFormat.getInstance(Locale.ROOT), }; private static final String padd = " "; Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StreamUtils.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StreamUtils.java (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StreamUtils.java (working copy) @@ -99,7 +99,7 @@ String fileName = file.getName(); int idx = fileName.lastIndexOf('.'); if (idx != -1) { - type = extensionToType.get(fileName.substring(idx).toLowerCase(Locale.ENGLISH)); + type = extensionToType.get(fileName.substring(idx).toLowerCase(Locale.ROOT)); } return type==null ? Type.PLAIN : type; } Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityStats.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityStats.java (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityStats.java (working copy) @@ -19,6 +19,7 @@ import java.io.PrintWriter; import java.text.NumberFormat; import java.util.ArrayList; +import java.util.Locale; /** * Results of quality benchmark run for a single query or for a set of queries. @@ -141,7 +142,7 @@ logger.println(title); } prefix = prefix==null ? "" : prefix; - NumberFormat nf = NumberFormat.getInstance(); + NumberFormat nf = NumberFormat.getInstance(Locale.ROOT); nf.setMaximumFractionDigits(3); nf.setMinimumFractionDigits(3); nf.setGroupingUsed(true); Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java (working copy) @@ -24,11 +24,13 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.util.IOUtils; import java.io.BufferedReader; import java.io.File; -import java.io.FileReader; +import java.io.OutputStreamWriter; import java.io.PrintWriter; +import java.nio.charset.Charset; import java.util.HashSet; import java.util.Set; @@ -51,7 +53,7 @@ File topicsFile = new File(args[0]); File qrelsFile = new File(args[1]); - SubmissionReport submitLog = new SubmissionReport(new PrintWriter(args[2]), "lucene"); + SubmissionReport submitLog = new SubmissionReport(new PrintWriter(args[2], "UTF-8"), "lucene"); FSDirectory dir = FSDirectory.open(new File(args[3])); String fieldSpec = args.length == 5 ? args[4] : "T"; // default to Title-only if not specified. IndexReader reader = DirectoryReader.open(dir); @@ -60,14 +62,14 @@ int maxResults = 1000; String docNameField = "docname"; - PrintWriter logger = new PrintWriter(System.out, true); + PrintWriter logger = new PrintWriter(new OutputStreamWriter(System.out, Charset.defaultCharset()), true); // use trec utilities to read trec topics into quality queries TrecTopicsReader qReader = new TrecTopicsReader(); - QualityQuery qqs[] = qReader.readQueries(new BufferedReader(new FileReader(topicsFile))); + QualityQuery qqs[] = qReader.readQueries(new BufferedReader(IOUtils.getDecodingReader(topicsFile, IOUtils.CHARSET_UTF_8))); // prepare judge, with trec utilities that read from a QRels file - Judge judge = new TrecJudge(new BufferedReader(new FileReader(qrelsFile))); + Judge judge = new TrecJudge(new BufferedReader(IOUtils.getDecodingReader(qrelsFile, IOUtils.CHARSET_UTF_8))); // validate topics & judgments match each other judge.validateData(qqs, logger); Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java (working copy) @@ -19,6 +19,7 @@ import java.io.IOException; import java.io.PrintWriter; import java.text.NumberFormat; +import java.util.Locale; import org.apache.lucene.benchmark.quality.QualityQuery; import org.apache.lucene.search.ScoreDoc; @@ -45,7 +46,7 @@ public SubmissionReport (PrintWriter logger, String name) { this.logger = logger; this.name = name; - nf = NumberFormat.getInstance(); + nf = NumberFormat.getInstance(Locale.ROOT); nf.setMaximumFractionDigits(4); nf.setMinimumFractionDigits(4); } Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java (working copy) @@ -19,13 +19,19 @@ import java.io.BufferedReader; import java.io.File; import java.io.FileFilter; +import java.io.FileInputStream; +import java.io.FileOutputStream; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.lucene.util.IOUtils; + /** * Split the Reuters SGML documents into Simple Text files containing: Title, Date, Dateline, Body */ @@ -73,7 +79,7 @@ */ protected void extractFile(File sgmFile) { try { - BufferedReader reader = new BufferedReader(new FileReader(sgmFile)); + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(sgmFile), IOUtils.CHARSET_UTF_8)); StringBuilder buffer = new StringBuilder(1024); StringBuilder outBuffer = new StringBuilder(1024); @@ -107,7 +113,7 @@ File outFile = new File(outputDir, sgmFile.getName() + "-" + (docNumber++) + ".txt"); // System.out.println("Writing " + outFile); - FileWriter writer = new FileWriter(outFile); + OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(outFile), IOUtils.CHARSET_UTF_8); writer.write(out); writer.close(); outBuffer.setLength(0); Index: lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractWikipedia.java =================================================================== --- lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractWikipedia.java (revision 1359190) +++ lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractWikipedia.java (working copy) @@ -18,8 +18,10 @@ */ import java.io.File; -import java.io.FileWriter; +import java.io.FileOutputStream; import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Writer; import java.util.Properties; import org.apache.lucene.benchmark.byTask.feeds.ContentSource; @@ -28,6 +30,7 @@ import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException; import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.document.Document; +import org.apache.lucene.util.IOUtils; /** * Extract the downloaded Wikipedia dump into separate files for indexing. @@ -83,7 +86,7 @@ contents.append("\n"); try { - FileWriter writer = new FileWriter(f); + Writer writer = new OutputStreamWriter(new FileOutputStream(f), IOUtils.CHARSET_UTF_8); writer.write(contents.toString()); writer.close(); } catch (IOException ioe) { Index: lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java =================================================================== --- lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java (revision 1359190) +++ lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java (working copy) @@ -166,7 +166,7 @@ // DocMaker did not close its ContentSource if resetInputs was called twice, // leading to a file handle leak. File f = new File(getWorkDir(), "docMakerLeak.txt"); - PrintStream ps = new PrintStream(f); + PrintStream ps = new PrintStream(f, "UTF-8"); ps.println("one title\t" + System.currentTimeMillis() + "\tsome content"); ps.close(); Index: lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTaskTest.java =================================================================== --- lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTaskTest.java (revision 1359190) +++ lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTaskTest.java (working copy) @@ -20,6 +20,7 @@ import java.io.ByteArrayOutputStream; import java.io.File; import java.io.PrintStream; +import java.nio.charset.Charset; import java.util.Properties; import org.apache.lucene.benchmark.BenchmarkTestCase; @@ -50,7 +51,7 @@ PrintStream curOut = System.out; ByteArrayOutputStream baos = new ByteArrayOutputStream(); - System.setOut(new PrintStream(baos)); + System.setOut(new PrintStream(baos, false, Charset.defaultCharset().name())); try { PerfRunData runData = createPerfRunData("SystemOut"); CreateIndexTask cit = new CreateIndexTask(runData); @@ -63,7 +64,7 @@ PrintStream curErr = System.err; baos.reset(); - System.setErr(new PrintStream(baos)); + System.setErr(new PrintStream(baos, false, Charset.defaultCharset().name())); try { PerfRunData runData = createPerfRunData("SystemErr"); CreateIndexTask cit = new CreateIndexTask(runData); Index: lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/utils/StreamUtilsTest.java =================================================================== --- lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/utils/StreamUtilsTest.java (revision 1359190) +++ lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/utils/StreamUtilsTest.java (working copy) @@ -31,6 +31,7 @@ import org.apache.commons.compress.compressors.CompressorStreamFactory; import org.apache.lucene.benchmark.BenchmarkTestCase; import org.apache.lucene.benchmark.byTask.utils.StreamUtils; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util._TestUtil; import org.junit.After; import org.junit.Before; @@ -88,7 +89,7 @@ private File rawTextFile(String ext) throws Exception { File f = new File(testDir,"testfile." + ext); - BufferedWriter w = new BufferedWriter(new FileWriter(f)); + BufferedWriter w = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f), IOUtils.CHARSET_UTF_8)); w.write(TEXT); w.newLine(); w.close(); @@ -117,7 +118,7 @@ } private void writeText(OutputStream os) throws IOException { - BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os)); + BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os, IOUtils.CHARSET_UTF_8)); w.write(TEXT); w.newLine(); w.close(); @@ -125,7 +126,7 @@ private void assertReadText(File f) throws Exception { InputStream ir = StreamUtils.inputStream(f); - InputStreamReader in = new InputStreamReader(ir); + InputStreamReader in = new InputStreamReader(ir, IOUtils.CHARSET_UTF_8); BufferedReader r = new BufferedReader(in); String line = r.readLine(); assertEquals("Wrong text found in "+f.getName(), TEXT, line); Index: lucene/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java =================================================================== --- lucene/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java (revision 1359190) +++ lucene/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java (working copy) @@ -31,7 +31,9 @@ import java.io.File; import java.io.InputStream; import java.io.InputStreamReader; +import java.io.OutputStreamWriter; import java.io.PrintWriter; +import java.nio.charset.Charset; /** * Test that quality run does its job. @@ -55,7 +57,7 @@ int maxResults = 1000; String docNameField = "doctitle"; // orig docID is in the linedoc format title - PrintWriter logger = VERBOSE ? new PrintWriter(System.out,true) : null; + PrintWriter logger = VERBOSE ? new PrintWriter(new OutputStreamWriter(System.out, Charset.defaultCharset()),true) : null; // prepare topics InputStream topics = getClass().getResourceAsStream("trecTopics.txt"); Index: lucene/build.xml =================================================================== --- lucene/build.xml (revision 1359190) +++ lucene/build.xml (working copy) @@ -169,11 +169,19 @@ - - + + + + + + + + + + Index: lucene/build.xml =================================================================== --- lucene/build.xml (revision 1359190) +++ lucene/build.xml (working copy) Property changes on: lucene/build.xml ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/branches/lucene4199/lucene/build.xml:r1358548-1359191 Index: lucene/core =================================================================== --- lucene/core (revision 1359190) +++ lucene/core (working copy) Property changes on: lucene/core ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/branches/lucene4199/lucene/core:r1358548-1359191 Index: lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java (revision 1359190) +++ lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java (working copy) @@ -20,8 +20,10 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.PrintStream; +import java.io.UnsupportedEncodingException; import java.util.Comparator; import java.util.Iterator; +import java.util.Locale; import java.util.TreeMap; import org.apache.lucene.index.DocsAndPositionsEnum; @@ -345,7 +347,12 @@ @Override public String toString() { final ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); - final PrintStream out = new PrintStream(bos); + PrintStream out; + try { + out = new PrintStream(bos, false, "UTF-8"); + } catch (UnsupportedEncodingException bogus) { + throw new RuntimeException(bogus); + } out.println(" index FST:"); out.println(" " + indexNodeCount + " nodes"); @@ -353,7 +360,7 @@ out.println(" " + indexNumBytes + " bytes"); out.println(" terms:"); out.println(" " + totalTermCount + " terms"); - out.println(" " + totalTermBytes + " bytes" + (totalTermCount != 0 ? " (" + String.format("%.1f", ((double) totalTermBytes)/totalTermCount) + " bytes/term)" : "")); + out.println(" " + totalTermBytes + " bytes" + (totalTermCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalTermBytes)/totalTermCount) + " bytes/term)" : "")); out.println(" blocks:"); out.println(" " + totalBlockCount + " blocks"); out.println(" " + termsOnlyBlockCount + " terms-only blocks"); @@ -362,9 +369,9 @@ out.println(" " + floorBlockCount + " floor blocks"); out.println(" " + (totalBlockCount-floorSubBlockCount) + " non-floor blocks"); out.println(" " + floorSubBlockCount + " floor sub-blocks"); - out.println(" " + totalBlockSuffixBytes + " term suffix bytes" + (totalBlockCount != 0 ? " (" + String.format("%.1f", ((double) totalBlockSuffixBytes)/totalBlockCount) + " suffix-bytes/block)" : "")); - out.println(" " + totalBlockStatsBytes + " term stats bytes" + (totalBlockCount != 0 ? " (" + String.format("%.1f", ((double) totalBlockStatsBytes)/totalBlockCount) + " stats-bytes/block)" : "")); - out.println(" " + totalBlockOtherBytes + " other bytes" + (totalBlockCount != 0 ? " (" + String.format("%.1f", ((double) totalBlockOtherBytes)/totalBlockCount) + " other-bytes/block)" : "")); + out.println(" " + totalBlockSuffixBytes + " term suffix bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockSuffixBytes)/totalBlockCount) + " suffix-bytes/block)" : "")); + out.println(" " + totalBlockStatsBytes + " term stats bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockStatsBytes)/totalBlockCount) + " stats-bytes/block)" : "")); + out.println(" " + totalBlockOtherBytes + " other bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockOtherBytes)/totalBlockCount) + " other-bytes/block)" : "")); if (totalBlockCount != 0) { out.println(" by prefix length:"); int total = 0; @@ -372,13 +379,17 @@ final int blockCount = blockCountByPrefixLen[prefix]; total += blockCount; if (blockCount != 0) { - out.println(" " + String.format("%2d", prefix) + ": " + blockCount); + out.println(" " + String.format(Locale.ROOT, "%2d", prefix) + ": " + blockCount); } } assert totalBlockCount == total; } - return bos.toString(); + try { + return bos.toString("UTF-8"); + } catch (UnsupportedEncodingException bogus) { + throw new RuntimeException(bogus); + } } } Index: lucene/core/src/java/org/apache/lucene/document/DateTools.java =================================================================== --- lucene/core/src/java/org/apache/lucene/document/DateTools.java (revision 1359190) +++ lucene/core/src/java/org/apache/lucene/document/DateTools.java (working copy) @@ -53,7 +53,7 @@ private static final ThreadLocal TL_CAL = new ThreadLocal() { @Override protected Calendar initialValue() { - return Calendar.getInstance(GMT, Locale.US); + return Calendar.getInstance(GMT, Locale.ROOT); } }; @@ -194,7 +194,7 @@ this.formatLen = formatLen; // formatLen 10's place: 11111111 // formatLen 1's place: 12345678901234567 - this.format = new SimpleDateFormat("yyyyMMddHHmmssSSS".substring(0,formatLen),Locale.US); + this.format = new SimpleDateFormat("yyyyMMddHHmmssSSS".substring(0,formatLen),Locale.ROOT); this.format.setTimeZone(GMT); } @@ -202,7 +202,7 @@ * in lowercase (for backwards compatibility) */ @Override public String toString() { - return super.toString().toLowerCase(Locale.ENGLISH); + return super.toString().toLowerCase(Locale.ROOT); } } Index: lucene/core/src/java/org/apache/lucene/index/CheckIndex.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (revision 1359190) +++ lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (working copy) @@ -25,6 +25,7 @@ import java.util.Comparator; import java.util.HashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import org.apache.lucene.codecs.BlockTreeTermsReader; @@ -340,7 +341,7 @@ * you only call this when the index is not opened by any * writer. */ public Status checkIndex(List onlySegments) throws IOException { - NumberFormat nf = NumberFormat.getInstance(); + NumberFormat nf = NumberFormat.getInstance(Locale.ROOT); SegmentInfos sis = new SegmentInfos(); Status result = new Status(); result.dir = dir; Index: lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (revision 1359190) +++ lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (working copy) @@ -20,6 +20,7 @@ import java.io.IOException; import java.text.NumberFormat; import java.util.HashSet; +import java.util.Locale; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.codecs.Codec; @@ -181,7 +182,7 @@ private int flushedDocCount; DocumentsWriterDeleteQueue deleteQueue; DeleteSlice deleteSlice; - private final NumberFormat nf = NumberFormat.getInstance(); + private final NumberFormat nf = NumberFormat.getInstance(Locale.ROOT); final Allocator byteBlockAllocator; Index: lucene/core/src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (revision 1359190) +++ lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -27,6 +27,7 @@ import java.util.Iterator; import java.util.LinkedList; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; @@ -3610,7 +3611,7 @@ // lost... if (infoStream.isEnabled("IW")) { - infoStream.message("IW", String.format("merged segment size=%.3f MB vs estimate=%.3f MB", merge.info.info.sizeInBytes()/1024./1024., merge.estimatedMergeBytes/1024/1024.)); + infoStream.message("IW", String.format(Locale.ROOT, "merged segment size=%.3f MB vs estimate=%.3f MB", merge.info.info.sizeInBytes()/1024./1024., merge.estimatedMergeBytes/1024/1024.)); } final IndexReaderWarmer mergedSegmentWarmer = config.getMergedSegmentWarmer(); Index: lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java (revision 1359190) +++ lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java (working copy) @@ -21,6 +21,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.List; +import java.util.Locale; import java.util.Map; @@ -535,7 +536,7 @@ if (size >= maxMergeSize) { extra += " [skip: too large]"; } - message("seg=" + writer.get().segString(info) + " level=" + infoLevel.level + " size=" + String.format("%.3f MB", segBytes/1024/1024.) + extra); + message("seg=" + writer.get().segString(info) + " level=" + infoLevel.level + " size=" + String.format(Locale.ROOT, "%.3f MB", segBytes/1024/1024.) + extra); } } Index: lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java (revision 1359190) +++ lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java (working copy) @@ -18,6 +18,7 @@ */ import java.io.IOException; +import java.util.Locale; import java.util.Map; import java.util.Collection; import java.util.Collections; @@ -289,7 +290,7 @@ } else if (segBytes < floorSegmentBytes) { extra += " [floored]"; } - message(" seg=" + writer.get().segString(info) + " size=" + String.format("%.3f", segBytes/1024/1024.) + " MB" + extra); + message(" seg=" + writer.get().segString(info) + " size=" + String.format(Locale.ROOT, "%.3f", segBytes/1024/1024.) + " MB" + extra); } minSegmentBytes = Math.min(segBytes, minSegmentBytes); @@ -388,7 +389,7 @@ final MergeScore score = score(candidate, hitTooLarge, mergingBytes); if (verbose()) { - message(" maybe=" + writer.get().segString(candidate) + " score=" + score.getScore() + " " + score.getExplanation() + " tooLarge=" + hitTooLarge + " size=" + String.format("%.3f MB", totAfterMergeBytes/1024./1024.)); + message(" maybe=" + writer.get().segString(candidate) + " score=" + score.getScore() + " " + score.getExplanation() + " tooLarge=" + hitTooLarge + " size=" + String.format(Locale.ROOT, "%.3f MB", totAfterMergeBytes/1024./1024.)); } // If we are already running a max sized merge @@ -413,7 +414,7 @@ } if (verbose()) { - message(" add merge=" + writer.get().segString(merge.segments) + " size=" + String.format("%.3f MB", bestMergeBytes/1024./1024.) + " score=" + String.format("%.3f", bestScore.getScore()) + " " + bestScore.getExplanation() + (bestTooLarge ? " [max merge]" : "")); + message(" add merge=" + writer.get().segString(merge.segments) + " size=" + String.format(Locale.ROOT, "%.3f MB", bestMergeBytes/1024./1024.) + " score=" + String.format(Locale.ROOT, "%.3f", bestScore.getScore()) + " " + bestScore.getExplanation() + (bestTooLarge ? " [max merge]" : "")); } } else { return spec; @@ -475,7 +476,7 @@ @Override public String getExplanation() { - return "skew=" + String.format("%.3f", skew) + " nonDelRatio=" + String.format("%.3f", nonDelRatio); + return "skew=" + String.format(Locale.ROOT, "%.3f", skew) + " nonDelRatio=" + String.format(Locale.ROOT, "%.3f", nonDelRatio); } }; } Index: lucene/core/src/java/org/apache/lucene/search/similarities/LMDirichletSimilarity.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/similarities/LMDirichletSimilarity.java (revision 1359190) +++ lucene/core/src/java/org/apache/lucene/search/similarities/LMDirichletSimilarity.java (working copy) @@ -17,6 +17,8 @@ * limitations under the License. */ +import java.util.Locale; + import org.apache.lucene.search.Explanation; /** @@ -92,6 +94,6 @@ @Override public String getName() { - return String.format("Dirichlet(%f)", getMu()); + return String.format(Locale.ROOT, "Dirichlet(%f)", getMu()); } } Index: lucene/core/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java (revision 1359190) +++ lucene/core/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java (working copy) @@ -17,6 +17,8 @@ * limitations under the License. */ +import java.util.Locale; + import org.apache.lucene.search.Explanation; /** @@ -72,6 +74,6 @@ @Override public String getName() { - return String.format("Jelinek-Mercer(%f)", getLambda()); + return String.format(Locale.ROOT, "Jelinek-Mercer(%f)", getLambda()); } } Index: lucene/core/src/java/org/apache/lucene/search/similarities/LMSimilarity.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/similarities/LMSimilarity.java (revision 1359190) +++ lucene/core/src/java/org/apache/lucene/search/similarities/LMSimilarity.java (working copy) @@ -17,6 +17,8 @@ * limitations under the License. */ +import java.util.Locale; + import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.TermStatistics; @@ -91,9 +93,9 @@ public String toString() { String coll = collectionModel.getName(); if (coll != null) { - return String.format("LM %s - %s", getName(), coll); + return String.format(Locale.ROOT, "LM %s - %s", getName(), coll); } else { - return String.format("LM %s", getName()); + return String.format(Locale.ROOT, "LM %s", getName()); } } Index: lucene/core/src/java/org/apache/lucene/util/NamedThreadFactory.java =================================================================== --- lucene/core/src/java/org/apache/lucene/util/NamedThreadFactory.java (revision 1359190) +++ lucene/core/src/java/org/apache/lucene/util/NamedThreadFactory.java (working copy) @@ -17,6 +17,7 @@ * limitations under the License. */ +import java.util.Locale; import java.util.concurrent.Executors; import java.util.concurrent.ThreadFactory; import java.util.concurrent.atomic.AtomicInteger; @@ -43,7 +44,7 @@ final SecurityManager s = System.getSecurityManager(); group = (s != null) ? s.getThreadGroup() : Thread.currentThread() .getThreadGroup(); - this.threadNamePrefix = String.format(NAME_PATTERN, + this.threadNamePrefix = String.format(Locale.ROOT, NAME_PATTERN, checkPrefix(threadNamePrefix), threadPoolNumber.getAndIncrement()); } @@ -57,7 +58,7 @@ * @see java.util.concurrent.ThreadFactory#newThread(java.lang.Runnable) */ public Thread newThread(Runnable r) { - final Thread t = new Thread(group, r, String.format("%s-%d", + final Thread t = new Thread(group, r, String.format(Locale.ROOT, "%s-%d", this.threadNamePrefix, threadNumber.getAndIncrement()), 0); t.setDaemon(false); t.setPriority(Thread.NORM_PRIORITY); Index: lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java =================================================================== --- lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java (revision 1359190) +++ lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java (working copy) @@ -559,7 +559,7 @@ */ public static String humanReadableUnits(long bytes) { return humanReadableUnits(bytes, - new DecimalFormat("0.#", DecimalFormatSymbols.getInstance(Locale.ENGLISH))); + new DecimalFormat("0.#", DecimalFormatSymbols.getInstance(Locale.ROOT))); } /** Index: lucene/core/src/java/org/apache/lucene/util/Version.java =================================================================== --- lucene/core/src/java/org/apache/lucene/util/Version.java (revision 1359190) +++ lucene/core/src/java/org/apache/lucene/util/Version.java (working copy) @@ -73,7 +73,7 @@ } public static Version parseLeniently(String version) { - String parsedMatchVersion = version.toUpperCase(Locale.ENGLISH); + String parsedMatchVersion = version.toUpperCase(Locale.ROOT); return Version.valueOf(parsedMatchVersion.replaceFirst("^(\\d)\\.(\\d)$", "LUCENE_$1$2")); } } \ No newline at end of file Index: lucene/core/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java =================================================================== --- lucene/core/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java (revision 1359190) +++ lucene/core/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java (working copy) @@ -159,7 +159,7 @@ public void testAppendableInterface() { CharTermAttributeImpl t = new CharTermAttributeImpl(); - Formatter formatter = new Formatter(t, Locale.US); + Formatter formatter = new Formatter(t, Locale.ROOT); formatter.format("%d", 1234); assertEquals("1234", t.toString()); formatter.format("%d", 5678); Index: lucene/core/src/test/org/apache/lucene/codecs/pulsing/Test10KPulsings.java =================================================================== --- lucene/core/src/test/org/apache/lucene/codecs/pulsing/Test10KPulsings.java (revision 1359190) +++ lucene/core/src/test/org/apache/lucene/codecs/pulsing/Test10KPulsings.java (working copy) @@ -71,7 +71,7 @@ Field field = newField("field", "", ft); document.add(field); - NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ENGLISH)); + NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ROOT)); for (int i = 0; i < 10050; i++) { field.setStringValue(df.format(i)); @@ -122,7 +122,7 @@ Field field = newField("field", "", ft); document.add(field); - NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ENGLISH)); + NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ROOT)); final int freq = freqCutoff + 1; Index: lucene/core/src/test/org/apache/lucene/document/TestBinaryDocument.java =================================================================== --- lucene/core/src/test/org/apache/lucene/document/TestBinaryDocument.java (revision 1359190) +++ lucene/core/src/test/org/apache/lucene/document/TestBinaryDocument.java (working copy) @@ -37,7 +37,7 @@ { FieldType ft = new FieldType(); ft.setStored(true); - IndexableField binaryFldStored = new StoredField("binaryStored", binaryValStored.getBytes()); + IndexableField binaryFldStored = new StoredField("binaryStored", binaryValStored.getBytes("UTF-8")); IndexableField stringFldStored = new Field("stringStored", binaryValStored, ft); Document doc = new Document(); @@ -62,7 +62,7 @@ /** fetch the binary stored field and compare it's content with the original one */ BytesRef bytes = docFromReader.getBinaryValue("binaryStored"); assertNotNull(bytes); - String binaryFldStoredTest = new String(bytes.bytes, bytes.offset, bytes.length); + String binaryFldStoredTest = new String(bytes.bytes, bytes.offset, bytes.length, "UTF-8"); assertTrue(binaryFldStoredTest.equals(binaryValStored)); /** fetch the string field and compare it's content with the original one */ @@ -75,7 +75,7 @@ } public void testCompressionTools() throws Exception { - IndexableField binaryFldCompressed = new StoredField("binaryCompressed", CompressionTools.compress(binaryValCompressed.getBytes())); + IndexableField binaryFldCompressed = new StoredField("binaryCompressed", CompressionTools.compress(binaryValCompressed.getBytes("UTF-8"))); IndexableField stringFldCompressed = new StoredField("stringCompressed", CompressionTools.compressString(binaryValCompressed)); Document doc = new Document(); @@ -94,7 +94,7 @@ assertTrue(docFromReader != null); /** fetch the binary compressed field and compare it's content with the original one */ - String binaryFldCompressedTest = new String(CompressionTools.decompress(docFromReader.getBinaryValue("binaryCompressed"))); + String binaryFldCompressedTest = new String(CompressionTools.decompress(docFromReader.getBinaryValue("binaryCompressed")), "UTF-8"); assertTrue(binaryFldCompressedTest.equals(binaryValCompressed)); assertTrue(CompressionTools.decompressString(docFromReader.getBinaryValue("stringCompressed")).equals(binaryValCompressed)); Index: lucene/core/src/test/org/apache/lucene/document/TestDateTools.java =================================================================== --- lucene/core/src/test/org/apache/lucene/document/TestDateTools.java (revision 1359190) +++ lucene/core/src/test/org/apache/lucene/document/TestDateTools.java (working copy) @@ -61,12 +61,12 @@ public void testStringtoTime() throws ParseException { long time = DateTools.stringToTime("197001010000"); - Calendar cal = new GregorianCalendar(); + // we use default locale since LuceneTestCase randomizes it + Calendar cal = new GregorianCalendar(TimeZone.getTimeZone("GMT"), Locale.getDefault()); cal.clear(); cal.set(1970, 0, 1, // year=1970, month=january, day=1 0, 0, 0); // hour, minute, second cal.set(Calendar.MILLISECOND, 0); - cal.setTimeZone(TimeZone.getTimeZone("GMT")); assertEquals(cal.getTime().getTime(), time); cal.set(1980, 1, 2, // year=1980, month=february, day=2 11, 5, 0); // hour, minute, second @@ -76,9 +76,9 @@ } public void testDateAndTimetoString() throws ParseException { - Calendar cal = new GregorianCalendar(); + // we use default locale since LuceneTestCase randomizes it + Calendar cal = new GregorianCalendar(TimeZone.getTimeZone("GMT"), Locale.getDefault()); cal.clear(); - cal.setTimeZone(TimeZone.getTimeZone("GMT")); cal.set(2004, 1, 3, // year=2004, month=february(!), day=3 22, 8, 56); // hour, minute, second cal.set(Calendar.MILLISECOND, 333); @@ -141,9 +141,9 @@ } public void testRound() { - Calendar cal = new GregorianCalendar(); + // we use default locale since LuceneTestCase randomizes it + Calendar cal = new GregorianCalendar(TimeZone.getTimeZone("GMT"), Locale.getDefault()); cal.clear(); - cal.setTimeZone(TimeZone.getTimeZone("GMT")); cal.set(2004, 1, 3, // year=2004, month=february(!), day=3 22, 8, 56); // hour, minute, second cal.set(Calendar.MILLISECOND, 333); @@ -180,7 +180,7 @@ } private String isoFormat(Date date) { - SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS", Locale.US); + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS", Locale.ROOT); sdf.setTimeZone(TimeZone.getTimeZone("GMT")); return sdf.format(date); } Index: lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (revision 1359190) +++ lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (working copy) @@ -219,10 +219,10 @@ ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); CheckIndex checker = new CheckIndex(dir); - checker.setInfoStream(new PrintStream(bos)); + checker.setInfoStream(new PrintStream(bos, false, "UTF-8")); CheckIndex.Status indexStatus = checker.checkIndex(); assertFalse(indexStatus.clean); - assertTrue(bos.toString().contains(IndexFormatTooOldException.class.getName())); + assertTrue(bos.toString("UTF-8").contains(IndexFormatTooOldException.class.getName())); dir.close(); _TestUtil.rmDir(oldIndxeDir); Index: lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java (revision 1359190) +++ lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java (working copy) @@ -52,12 +52,12 @@ ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); CheckIndex checker = new CheckIndex(dir); - checker.setInfoStream(new PrintStream(bos)); + checker.setInfoStream(new PrintStream(bos, false, "UTF-8")); if (VERBOSE) checker.setInfoStream(System.out); CheckIndex.Status indexStatus = checker.checkIndex(); if (indexStatus.clean == false) { System.out.println("CheckIndex failed"); - System.out.println(bos.toString()); + System.out.println(bos.toString("UTF-8")); fail(); } Index: lucene/core/src/test/org/apache/lucene/index/TestDoc.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestDoc.java (revision 1359190) +++ lucene/core/src/test/org/apache/lucene/index/TestDoc.java (working copy) @@ -17,11 +17,14 @@ * limitations under the License. */ import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; +import java.io.FileInputStream; +import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.io.StringWriter; +import java.io.Writer; import java.util.Collection; import java.util.HashSet; import java.util.LinkedList; @@ -78,14 +81,14 @@ } private File createOutput(String name, String text) throws IOException { - FileWriter fw = null; + Writer fw = null; PrintWriter pw = null; try { File f = new File(workDir, name); if (f.exists()) f.delete(); - fw = new FileWriter(f); + fw = new OutputStreamWriter(new FileOutputStream(f), "UTF-8"); pw = new PrintWriter(fw); pw.println(text); return f; @@ -182,9 +185,11 @@ { File file = new File(workDir, fileName); Document doc = new Document(); - doc.add(new TextField("contents", new FileReader(file), Field.Store.NO)); + InputStreamReader is = new InputStreamReader(new FileInputStream(file), "UTF-8"); + doc.add(new TextField("contents", is, Field.Store.NO)); writer.addDocument(doc); writer.commit(); + is.close(); return writer.newestSegment(); } Index: lucene/core/src/test/org/apache/lucene/index/TestPayloads.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestPayloads.java (revision 1359190) +++ lucene/core/src/test/org/apache/lucene/index/TestPayloads.java (working copy) @@ -43,9 +43,8 @@ // Simple tests to test the Payload class public void testPayload() throws Exception { - byte[] testData = "This is a test!".getBytes(); - BytesRef payload = new BytesRef(testData); - assertEquals("Wrong payload length.", testData.length, payload.length); + BytesRef payload = new BytesRef("This is a test!"); + assertEquals("Wrong payload length.", "This is a test!".length(), payload.length); BytesRef clone = payload.clone(); assertEquals(payload.length, clone.length); @@ -73,7 +72,7 @@ // enabled in only some documents d.add(newTextField("f3", "This field has payloads in some docs", Field.Store.NO)); // only add payload data for field f2 - analyzer.setPayloadData("f2", "somedata".getBytes(), 0, 1); + analyzer.setPayloadData("f2", "somedata".getBytes("UTF-8"), 0, 1); writer.addDocument(d); // flush writer.close(); @@ -96,8 +95,8 @@ d.add(newTextField("f2", "This field has payloads in all docs", Field.Store.NO)); d.add(newTextField("f3", "This field has payloads in some docs", Field.Store.NO)); // add payload data for field f2 and f3 - analyzer.setPayloadData("f2", "somedata".getBytes(), 0, 1); - analyzer.setPayloadData("f3", "somedata".getBytes(), 0, 3); + analyzer.setPayloadData("f2", "somedata".getBytes("UTF-8"), 0, 1); + analyzer.setPayloadData("f3", "somedata".getBytes("UTF-8"), 0, 3); writer.addDocument(d); // force merge Index: lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java =================================================================== --- lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java (revision 1359190) +++ lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java (working copy) @@ -81,7 +81,7 @@ @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - payloadAttr.setPayload(new BytesRef(("pos: " + pos).getBytes())); + payloadAttr.setPayload(new BytesRef(("pos: " + pos).getBytes("UTF-8"))); pos++; return true; } else { @@ -411,7 +411,7 @@ @Test public void testSpanPayloadCheck() throws Exception { SpanTermQuery term1 = new SpanTermQuery(new Term("field", "five")); - BytesRef pay = new BytesRef(("pos: " + 5).getBytes()); + BytesRef pay = new BytesRef(("pos: " + 5).getBytes("UTF-8")); SpanQuery query = new SpanPayloadCheckQuery(term1, Collections.singletonList(pay.bytes)); checkHits(query, new int[] {1125, 1135, 1145, 1155, 1165, 1175, 1185, 1195, 1225, 1235, 1245, 1255, 1265, 1275, 1285, 1295, 1325, 1335, 1345, 1355, 1365, 1375, 1385, 1395, 1425, 1435, 1445, 1455, 1465, 1475, 1485, 1495, 1525, 1535, 1545, 1555, 1565, 1575, 1585, 1595, 1625, 1635, 1645, 1655, 1665, 1675, 1685, 1695, 1725, 1735, 1745, 1755, 1765, 1775, 1785, 1795, 1825, 1835, 1845, 1855, 1865, 1875, 1885, 1895, 1925, 1935, 1945, 1955, 1965, 1975, 1985, 1995}); @@ -426,8 +426,8 @@ clauses[0] = term1; clauses[1] = term2; snq = new SpanNearQuery(clauses, 0, true); - pay = new BytesRef(("pos: " + 0).getBytes()); - pay2 = new BytesRef(("pos: " + 1).getBytes()); + pay = new BytesRef(("pos: " + 0).getBytes("UTF-8")); + pay2 = new BytesRef(("pos: " + 1).getBytes("UTF-8")); list = new ArrayList(); list.add(pay.bytes); list.add(pay2.bytes); @@ -439,9 +439,9 @@ clauses[1] = term2; clauses[2] = new SpanTermQuery(new Term("field", "five")); snq = new SpanNearQuery(clauses, 0, true); - pay = new BytesRef(("pos: " + 0).getBytes()); - pay2 = new BytesRef(("pos: " + 1).getBytes()); - BytesRef pay3 = new BytesRef(("pos: " + 2).getBytes()); + pay = new BytesRef(("pos: " + 0).getBytes("UTF-8")); + pay2 = new BytesRef(("pos: " + 1).getBytes("UTF-8")); + BytesRef pay3 = new BytesRef(("pos: " + 2).getBytes("UTF-8")); list = new ArrayList(); list.add(pay.bytes); list.add(pay2.bytes); @@ -470,10 +470,10 @@ checkHits(query, new int[]{1103, 1203,1303,1403,1503,1603,1703,1803,1903}); Collection payloads = new ArrayList(); - BytesRef pay = new BytesRef(("pos: " + 0).getBytes()); - BytesRef pay2 = new BytesRef(("pos: " + 1).getBytes()); - BytesRef pay3 = new BytesRef(("pos: " + 3).getBytes()); - BytesRef pay4 = new BytesRef(("pos: " + 4).getBytes()); + BytesRef pay = new BytesRef(("pos: " + 0).getBytes("UTF-8")); + BytesRef pay2 = new BytesRef(("pos: " + 1).getBytes("UTF-8")); + BytesRef pay3 = new BytesRef(("pos: " + 3).getBytes("UTF-8")); + BytesRef pay4 = new BytesRef(("pos: " + 4).getBytes("UTF-8")); payloads.add(pay.bytes); payloads.add(pay2.bytes); payloads.add(pay3.bytes); Index: lucene/core/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java =================================================================== --- lucene/core/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java (revision 1359190) +++ lucene/core/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java (working copy) @@ -276,7 +276,7 @@ Collection payloads = spans.getPayload(); for (final byte [] payload : payloads) { - payloadSet.add(new String(payload)); + payloadSet.add(new String(payload, "UTF-8")); } } } @@ -311,7 +311,7 @@ while (spans.next()) { Collection payloads = spans.getPayload(); for (final byte[] payload : payloads) { - payloadSet.add(new String(payload)); + payloadSet.add(new String(payload, "UTF-8")); } } } @@ -347,7 +347,7 @@ Collection payloads = spans.getPayload(); for (final byte [] payload : payloads) { - payloadSet.add(new String(payload)); + payloadSet.add(new String(payload, "UTF-8")); } } } @@ -383,7 +383,7 @@ System.out.println("Num payloads:" + payloads.size()); for (final byte [] bytes : payloads) { if(VERBOSE) - System.out.println(new String(bytes)); + System.out.println(new String(bytes, "UTF-8")); } reader.close(); directory.close(); @@ -456,7 +456,7 @@ for (final byte [] bytes : payload) { if(VERBOSE) System.out.println("doc:" + spans.doc() + " s:" + spans.start() + " e:" + spans.end() + " " - + new String(bytes)); + + new String(bytes, "UTF-8")); } assertEquals(numPayloads[cnt],payload.size()); @@ -505,9 +505,9 @@ if (!nopayload.contains(token)) { if (entities.contains(token)) { - payloadAtt.setPayload(new BytesRef((token + ":Entity:"+ pos ).getBytes())); + payloadAtt.setPayload(new BytesRef(token + ":Entity:"+ pos )); } else { - payloadAtt.setPayload(new BytesRef((token + ":Noise:" + pos ).getBytes())); + payloadAtt.setPayload(new BytesRef(token + ":Noise:" + pos )); } } pos += posIncrAtt.getPositionIncrement(); Index: lucene/core/src/test/org/apache/lucene/search/TestCustomSearcherSort.java =================================================================== --- lucene/core/src/test/org/apache/lucene/search/TestCustomSearcherSort.java (revision 1359190) +++ lucene/core/src/test/org/apache/lucene/search/TestCustomSearcherSort.java (working copy) @@ -19,8 +19,10 @@ import java.io.IOException; import java.util.Calendar; import java.util.GregorianCalendar; +import java.util.Locale; import java.util.Map; import java.util.Random; +import java.util.TimeZone; import java.util.TreeMap; import org.apache.lucene.document.DateTools; @@ -230,10 +232,12 @@ private class RandomGen { RandomGen(Random random) { this.random = random; + base.set(1980, 1, 1); } private Random random; - private Calendar base = new GregorianCalendar(1980, 1, 1); + // we use the default Locale/TZ since LuceneTestCase randomizes it + private Calendar base = new GregorianCalendar(TimeZone.getDefault(), Locale.getDefault()); // Just to generate some different Lucene Date strings private String getLuceneDate() { Index: lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java =================================================================== --- lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java (revision 1359190) +++ lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java (working copy) @@ -23,6 +23,7 @@ import java.util.Arrays; import java.util.LinkedHashSet; import java.util.List; +import java.util.Locale; import java.util.concurrent.CyclicBarrier; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -117,10 +118,10 @@ try { FieldCache cache = FieldCache.DEFAULT; ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); - cache.setInfoStream(new PrintStream(bos)); + cache.setInfoStream(new PrintStream(bos, false, "UTF-8")); cache.getDoubles(reader, "theDouble", false); cache.getFloats(reader, "theDouble", false); - assertTrue(bos.toString().indexOf("WARNING") != -1); + assertTrue(bos.toString("UTF-8").indexOf("WARNING") != -1); } finally { FieldCache.DEFAULT.purgeAllCaches(); } @@ -261,7 +262,7 @@ if (chunk == 0) { for (int ord = 0; ord < values.size(); ord++) { BytesRef term = values.get(ord); - assertNull(String.format("Document[%d] misses field must be null. Has value %s for ord %d", i, term, ord), term); + assertNull(String.format(Locale.ROOT, "Document[%d] misses field must be null. Has value %s for ord %d", i, term, ord), term); } break; } @@ -275,7 +276,7 @@ reuse = termOrds.lookup(i, reuse); reuse.read(buffer); } - assertTrue(String.format("Expected value %s for doc %d and ord %d, but was %s", expected, i, idx, actual), expected.equals(actual)); + assertTrue(String.format(Locale.ROOT, "Expected value %s for doc %d and ord %d, but was %s", expected, i, idx, actual), expected.equals(actual)); } if (chunk <= buffer.length) { Index: lucene/core/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java =================================================================== --- lucene/core/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java (revision 1359190) +++ lucene/core/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java (working copy) @@ -44,7 +44,7 @@ newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMaxBufferedDocs(_TestUtil.nextInt(random(), 50, 1000))); - DecimalFormat format = new DecimalFormat("00000000000", new DecimalFormatSymbols(Locale.US)); + DecimalFormat format = new DecimalFormat("00000000000", new DecimalFormatSymbols(Locale.ROOT)); int num = atLeast(500); for (int l = 0; l < num; l++) { Index: lucene/core/src/test/org/apache/lucene/search/TestRegexpRandom.java =================================================================== --- lucene/core/src/test/org/apache/lucene/search/TestRegexpRandom.java (revision 1359190) +++ lucene/core/src/test/org/apache/lucene/search/TestRegexpRandom.java (working copy) @@ -58,7 +58,7 @@ Field field = newField("field", "", customType); doc.add(field); - NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ENGLISH)); + NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT)); for (int i = 0; i < 1000; i++) { field.setStringValue(df.format(i)); writer.addDocument(doc); Index: lucene/core/src/test/org/apache/lucene/search/TestWildcardRandom.java =================================================================== --- lucene/core/src/test/org/apache/lucene/search/TestWildcardRandom.java (revision 1359190) +++ lucene/core/src/test/org/apache/lucene/search/TestWildcardRandom.java (working copy) @@ -54,7 +54,7 @@ Field field = newStringField("field", "", Field.Store.NO); doc.add(field); - NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ENGLISH)); + NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT)); for (int i = 0; i < 1000; i++) { field.setStringValue(df.format(i)); writer.addDocument(doc); Index: lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java =================================================================== --- lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java (revision 1359190) +++ lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java (working copy) @@ -1298,7 +1298,7 @@ ord++; if (ord % 500000 == 0) { System.out.println( - String.format(Locale.ENGLISH, + String.format(Locale.ROOT, "%6.2fs: %9d...", ((System.currentTimeMillis() - tStart) / 1000.0), ord)); } if (ord >= limit) { @@ -1637,7 +1637,7 @@ String idString; if (cycle == 0) { // PKs are assigned sequentially - idString = String.format("%07d", id); + idString = String.format(Locale.ROOT, "%07d", id); } else { while(true) { final String s = Long.toString(random().nextLong()); @@ -1668,7 +1668,7 @@ for(int idx=0;idx LOCALE = ConfigurationKey.newInstance(); + final public static ConfigurationKey TIMEZONE = ConfigurationKey.newInstance(); + /** * Key used to set the {@link RewriteMethod} used when creating queries * Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/nodes/NumericQueryNode.java =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/nodes/NumericQueryNode.java (revision 1359190) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/nodes/NumericQueryNode.java (working copy) @@ -90,8 +90,8 @@ * @return the value converte to {@link String} and escaped */ protected CharSequence getTermEscaped(EscapeQuerySyntax escaper) { - return escaper.escape(NumberFormat.getNumberInstance().format(this.value), - Locale.ENGLISH, Type.NORMAL); + return escaper.escape(numberFormat.format(this.value), + Locale.ROOT, Type.NORMAL); } public CharSequence toQueryString(EscapeQuerySyntax escapeSyntaxParser) { Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/CharStream.java =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/CharStream.java (revision 0) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/CharStream.java (working copy) @@ -0,0 +1,112 @@ +/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 4.1 */ +/* JavaCCOptions:STATIC=false */ +package org.apache.lucene.queryparser.flexible.standard.parser; + +/** + * This interface describes a character stream that maintains line and + * column number positions of the characters. It also has the capability + * to backup the stream to some extent. An implementation of this + * interface is used in the TokenManager implementation generated by + * JavaCCParser. + * + * All the methods except backup can be implemented in any fashion. backup + * needs to be implemented correctly for the correct operation of the lexer. + * Rest of the methods are all used to get information like line number, + * column number and the String that constitutes a token and are not used + * by the lexer. Hence their implementation won't affect the generated lexer's + * operation. + */ + +public interface CharStream { + + /** + * Returns the next character from the selected input. The method + * of selecting the input is the responsibility of the class + * implementing this interface. Can throw any java.io.IOException. + */ + char readChar() throws java.io.IOException; + + /** + * Returns the column position of the character last read. + * @deprecated + * @see #getEndColumn + */ + int getColumn(); + + /** + * Returns the line number of the character last read. + * @deprecated + * @see #getEndLine + */ + int getLine(); + + /** + * Returns the column number of the last character for current token (being + * matched after the last call to BeginTOken). + */ + int getEndColumn(); + + /** + * Returns the line number of the last character for current token (being + * matched after the last call to BeginTOken). + */ + int getEndLine(); + + /** + * Returns the column number of the first character for current token (being + * matched after the last call to BeginTOken). + */ + int getBeginColumn(); + + /** + * Returns the line number of the first character for current token (being + * matched after the last call to BeginTOken). + */ + int getBeginLine(); + + /** + * Backs up the input stream by amount steps. Lexer calls this method if it + * had already read some characters, but could not use them to match a + * (longer) token. So, they will be used again as the prefix of the next + * token and it is the implemetation's responsibility to do this right. + */ + void backup(int amount); + + /** + * Returns the next character that marks the beginning of the next token. + * All characters must remain in the buffer between two successive calls + * to this method to implement backup correctly. + */ + char BeginToken() throws java.io.IOException; + + /** + * Returns a string made up of characters from the marked token beginning + * to the current buffer position. Implementations have the choice of returning + * anything that they want to. For example, for efficiency, one might decide + * to just return null, which is a valid implementation. + */ + String GetImage(); + + /** + * Returns an array of characters that make up the suffix of length 'len' for + * the currently matched token. This is used to build up the matched string + * for use in actions in the case of MORE. A simple and inefficient + * implementation of this is as follows : + * + * { + * String t = GetImage(); + * return t.substring(t.length() - len, t.length()).toCharArray(); + * } + */ + char[] GetSuffix(int len); + + /** + * The lexer calls this function to indicate that it is done with the stream + * and hence implementations can free any resources held by this class. + * Again, the body of this function can be just empty and it will not + * affect the lexer's operation. + */ + void Done(); + +} +/* JavaCC - OriginalChecksum=298ffb3c7c64c6de9b7812e011e58d99 (do not edit this line) */ Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/CharStream.java =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/CharStream.java (revision 1359191) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/CharStream.java (working copy) Property changes on: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/CharStream.java ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +Date Author Id Revision HeadURL \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/EscapeQuerySyntaxImpl.java =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/EscapeQuerySyntaxImpl.java (revision 1359190) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/EscapeQuerySyntaxImpl.java (working copy) @@ -50,7 +50,7 @@ // regular escapable Char for terms for (int i = 0; i < escapableTermChars.length; i++) { - buffer = replaceIgnoreCase(buffer, escapableTermChars[i].toLowerCase(), + buffer = replaceIgnoreCase(buffer, escapableTermChars[i].toLowerCase(locale), "\\", locale); } @@ -73,7 +73,7 @@ CharSequence buffer = str; for (int i = 0; i < escapableQuotedChars.length; i++) { - buffer = replaceIgnoreCase(buffer, escapableTermChars[i].toLowerCase(), + buffer = replaceIgnoreCase(buffer, escapableTermChars[i].toLowerCase(locale), "\\", locale); } return buffer; @@ -178,7 +178,7 @@ CharSequence buffer = str; for (int i = 0; i < escapableWhiteChars.length; i++) { - buffer = replaceIgnoreCase(buffer, escapableWhiteChars[i].toLowerCase(), + buffer = replaceIgnoreCase(buffer, escapableWhiteChars[i].toLowerCase(locale), "\\", locale); } return buffer; Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/FastCharStream.java =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/FastCharStream.java (revision 0) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/FastCharStream.java (working copy) @@ -0,0 +1,123 @@ +// FastCharStream.java +package org.apache.lucene.queryparser.flexible.standard.parser; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +import java.io.*; + +/** An efficient implementation of JavaCC's CharStream interface.

Note that + * this does not do line-number counting, but instead keeps track of the + * character position of the token in the input, as required by Lucene's {@link + * org.apache.lucene.analysis.Token} API. + * */ +public final class FastCharStream implements CharStream { + char[] buffer = null; + + int bufferLength = 0; // end of valid chars + int bufferPosition = 0; // next char to read + + int tokenStart = 0; // offset in buffer + int bufferStart = 0; // position in file of buffer + + Reader input; // source of chars + + /** Constructs from a Reader. */ + public FastCharStream(Reader r) { + input = r; + } + + public final char readChar() throws IOException { + if (bufferPosition >= bufferLength) + refill(); + return buffer[bufferPosition++]; + } + + private final void refill() throws IOException { + int newPosition = bufferLength - tokenStart; + + if (tokenStart == 0) { // token won't fit in buffer + if (buffer == null) { // first time: alloc buffer + buffer = new char[2048]; + } else if (bufferLength == buffer.length) { // grow buffer + char[] newBuffer = new char[buffer.length*2]; + System.arraycopy(buffer, 0, newBuffer, 0, bufferLength); + buffer = newBuffer; + } + } else { // shift token to front + System.arraycopy(buffer, tokenStart, buffer, 0, newPosition); + } + + bufferLength = newPosition; // update state + bufferPosition = newPosition; + bufferStart += tokenStart; + tokenStart = 0; + + int charsRead = // fill space in buffer + input.read(buffer, newPosition, buffer.length-newPosition); + if (charsRead == -1) + throw new IOException("read past eof"); + else + bufferLength += charsRead; + } + + public final char BeginToken() throws IOException { + tokenStart = bufferPosition; + return readChar(); + } + + public final void backup(int amount) { + bufferPosition -= amount; + } + + public final String GetImage() { + return new String(buffer, tokenStart, bufferPosition - tokenStart); + } + + public final char[] GetSuffix(int len) { + char[] value = new char[len]; + System.arraycopy(buffer, bufferPosition - len, value, 0, len); + return value; + } + + public final void Done() { + try { + input.close(); + } catch (IOException e) { + } + } + + public final int getColumn() { + return bufferStart + bufferPosition; + } + public final int getLine() { + return 1; + } + public final int getEndColumn() { + return bufferStart + bufferPosition; + } + public final int getEndLine() { + return 1; + } + public final int getBeginColumn() { + return bufferStart + tokenStart; + } + public final int getBeginLine() { + return 1; + } +} Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/FastCharStream.java =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/FastCharStream.java (revision 1359191) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/FastCharStream.java (working copy) Property changes on: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/FastCharStream.java ___________________________________________________________________ Added: cvs2svn:cvs-rev ## -0,0 +1 ## +1.3 \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +Author Date Id Revision \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/JavaCharStream.java =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/JavaCharStream.java (revision 1359190) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/JavaCharStream.java (working copy) @@ -1,616 +0,0 @@ -/* Generated By:JavaCC: Do not edit this line. JavaCharStream.java Version 4.1 */ -/* JavaCCOptions:STATIC=false */ -package org.apache.lucene.queryparser.flexible.standard.parser; - -/** - * An implementation of interface CharStream, where the stream is assumed to - * contain only ASCII characters (with java-like unicode escape processing). - */ - -public class JavaCharStream -{ -/** Whether parser is static. */ - public static final boolean staticFlag = false; - static final int hexval(char c) throws java.io.IOException { - switch(c) - { - case '0' : - return 0; - case '1' : - return 1; - case '2' : - return 2; - case '3' : - return 3; - case '4' : - return 4; - case '5' : - return 5; - case '6' : - return 6; - case '7' : - return 7; - case '8' : - return 8; - case '9' : - return 9; - - case 'a' : - case 'A' : - return 10; - case 'b' : - case 'B' : - return 11; - case 'c' : - case 'C' : - return 12; - case 'd' : - case 'D' : - return 13; - case 'e' : - case 'E' : - return 14; - case 'f' : - case 'F' : - return 15; - } - - throw new java.io.IOException(); // Should never come here - } - -/** Position in buffer. */ - public int bufpos = -1; - int bufsize; - int available; - int tokenBegin; - protected int bufline[]; - protected int bufcolumn[]; - - protected int column = 0; - protected int line = 1; - - protected boolean prevCharIsCR = false; - protected boolean prevCharIsLF = false; - - protected java.io.Reader inputStream; - - protected char[] nextCharBuf; - protected char[] buffer; - protected int maxNextCharInd = 0; - protected int nextCharInd = -1; - protected int inBuf = 0; - protected int tabSize = 8; - - protected void setTabSize(int i) { tabSize = i; } - protected int getTabSize(int i) { return tabSize; } - - protected void ExpandBuff(boolean wrapAround) - { - char[] newbuffer = new char[bufsize + 2048]; - int newbufline[] = new int[bufsize + 2048]; - int newbufcolumn[] = new int[bufsize + 2048]; - - try - { - if (wrapAround) - { - System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin); - System.arraycopy(buffer, 0, newbuffer, - bufsize - tokenBegin, bufpos); - buffer = newbuffer; - - System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin); - System.arraycopy(bufline, 0, newbufline, bufsize - tokenBegin, bufpos); - bufline = newbufline; - - System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin); - System.arraycopy(bufcolumn, 0, newbufcolumn, bufsize - tokenBegin, bufpos); - bufcolumn = newbufcolumn; - - bufpos += (bufsize - tokenBegin); - } - else - { - System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin); - buffer = newbuffer; - - System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin); - bufline = newbufline; - - System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin); - bufcolumn = newbufcolumn; - - bufpos -= tokenBegin; - } - } - catch (Throwable t) - { - throw new Error(t.getMessage()); - } - - available = (bufsize += 2048); - tokenBegin = 0; - } - - protected void FillBuff() throws java.io.IOException - { - int i; - if (maxNextCharInd == 4096) - maxNextCharInd = nextCharInd = 0; - - try { - if ((i = inputStream.read(nextCharBuf, maxNextCharInd, - 4096 - maxNextCharInd)) == -1) - { - inputStream.close(); - throw new java.io.IOException(); - } - else - maxNextCharInd += i; - return; - } - catch(java.io.IOException e) { - if (bufpos != 0) - { - --bufpos; - backup(0); - } - else - { - bufline[bufpos] = line; - bufcolumn[bufpos] = column; - } - throw e; - } - } - - protected char ReadByte() throws java.io.IOException - { - if (++nextCharInd >= maxNextCharInd) - FillBuff(); - - return nextCharBuf[nextCharInd]; - } - -/** @return starting character for token. */ - public char BeginToken() throws java.io.IOException - { - if (inBuf > 0) - { - --inBuf; - - if (++bufpos == bufsize) - bufpos = 0; - - tokenBegin = bufpos; - return buffer[bufpos]; - } - - tokenBegin = 0; - bufpos = -1; - - return readChar(); - } - - protected void AdjustBuffSize() - { - if (available == bufsize) - { - if (tokenBegin > 2048) - { - bufpos = 0; - available = tokenBegin; - } - else - ExpandBuff(false); - } - else if (available > tokenBegin) - available = bufsize; - else if ((tokenBegin - available) < 2048) - ExpandBuff(true); - else - available = tokenBegin; - } - - protected void UpdateLineColumn(char c) - { - column++; - - if (prevCharIsLF) - { - prevCharIsLF = false; - line += (column = 1); - } - else if (prevCharIsCR) - { - prevCharIsCR = false; - if (c == '\n') - { - prevCharIsLF = true; - } - else - line += (column = 1); - } - - switch (c) - { - case '\r' : - prevCharIsCR = true; - break; - case '\n' : - prevCharIsLF = true; - break; - case '\t' : - column--; - column += (tabSize - (column % tabSize)); - break; - default : - break; - } - - bufline[bufpos] = line; - bufcolumn[bufpos] = column; - } - -/** Read a character. */ - public char readChar() throws java.io.IOException - { - if (inBuf > 0) - { - --inBuf; - - if (++bufpos == bufsize) - bufpos = 0; - - return buffer[bufpos]; - } - - char c; - - if (++bufpos == available) - AdjustBuffSize(); - - if ((buffer[bufpos] = c = ReadByte()) == '\\') - { - UpdateLineColumn(c); - - int backSlashCnt = 1; - - for (;;) // Read all the backslashes - { - if (++bufpos == available) - AdjustBuffSize(); - - try - { - if ((buffer[bufpos] = c = ReadByte()) != '\\') - { - UpdateLineColumn(c); - // found a non-backslash char. - if ((c == 'u') && ((backSlashCnt & 1) == 1)) - { - if (--bufpos < 0) - bufpos = bufsize - 1; - - break; - } - - backup(backSlashCnt); - return '\\'; - } - } - catch(java.io.IOException e) - { - if (backSlashCnt > 1) - backup(backSlashCnt-1); - - return '\\'; - } - - UpdateLineColumn(c); - backSlashCnt++; - } - - // Here, we have seen an odd number of backslash's followed by a 'u' - try - { - while ((c = ReadByte()) == 'u') - ++column; - - buffer[bufpos] = c = (char)(hexval(c) << 12 | - hexval(ReadByte()) << 8 | - hexval(ReadByte()) << 4 | - hexval(ReadByte())); - - column += 4; - } - catch(java.io.IOException e) - { - throw new Error("Invalid escape character at line " + line + - " column " + column + "."); - } - - if (backSlashCnt == 1) - return c; - else - { - backup(backSlashCnt - 1); - return '\\'; - } - } - else - { - UpdateLineColumn(c); - return c; - } - } - - @Deprecated - /** - * @deprecated - * @see #getEndColumn - */ - public int getColumn() { - return bufcolumn[bufpos]; - } - - @Deprecated - /** - * @deprecated - * @see #getEndLine - */ - public int getLine() { - return bufline[bufpos]; - } - -/** Get end column. */ - public int getEndColumn() { - return bufcolumn[bufpos]; - } - -/** Get end line. */ - public int getEndLine() { - return bufline[bufpos]; - } - -/** @return column of token start */ - public int getBeginColumn() { - return bufcolumn[tokenBegin]; - } - -/** @return line number of token start */ - public int getBeginLine() { - return bufline[tokenBegin]; - } - -/** Retreat. */ - public void backup(int amount) { - - inBuf += amount; - if ((bufpos -= amount) < 0) - bufpos += bufsize; - } - -/** Constructor. */ - public JavaCharStream(java.io.Reader dstream, - int startline, int startcolumn, int buffersize) - { - inputStream = dstream; - line = startline; - column = startcolumn - 1; - - available = bufsize = buffersize; - buffer = new char[buffersize]; - bufline = new int[buffersize]; - bufcolumn = new int[buffersize]; - nextCharBuf = new char[4096]; - } - -/** Constructor. */ - public JavaCharStream(java.io.Reader dstream, - int startline, int startcolumn) - { - this(dstream, startline, startcolumn, 4096); - } - -/** Constructor. */ - public JavaCharStream(java.io.Reader dstream) - { - this(dstream, 1, 1, 4096); - } -/** Reinitialise. */ - public void ReInit(java.io.Reader dstream, - int startline, int startcolumn, int buffersize) - { - inputStream = dstream; - line = startline; - column = startcolumn - 1; - - if (buffer == null || buffersize != buffer.length) - { - available = bufsize = buffersize; - buffer = new char[buffersize]; - bufline = new int[buffersize]; - bufcolumn = new int[buffersize]; - nextCharBuf = new char[4096]; - } - prevCharIsLF = prevCharIsCR = false; - tokenBegin = inBuf = maxNextCharInd = 0; - nextCharInd = bufpos = -1; - } - -/** Reinitialise. */ - public void ReInit(java.io.Reader dstream, - int startline, int startcolumn) - { - ReInit(dstream, startline, startcolumn, 4096); - } - -/** Reinitialise. */ - public void ReInit(java.io.Reader dstream) - { - ReInit(dstream, 1, 1, 4096); - } -/** Constructor. */ - public JavaCharStream(java.io.InputStream dstream, String encoding, int startline, - int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException - { - this(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize); - } - -/** Constructor. */ - public JavaCharStream(java.io.InputStream dstream, int startline, - int startcolumn, int buffersize) - { - this(new java.io.InputStreamReader(dstream), startline, startcolumn, 4096); - } - -/** Constructor. */ - public JavaCharStream(java.io.InputStream dstream, String encoding, int startline, - int startcolumn) throws java.io.UnsupportedEncodingException - { - this(dstream, encoding, startline, startcolumn, 4096); - } - -/** Constructor. */ - public JavaCharStream(java.io.InputStream dstream, int startline, - int startcolumn) - { - this(dstream, startline, startcolumn, 4096); - } - -/** Constructor. */ - public JavaCharStream(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException - { - this(dstream, encoding, 1, 1, 4096); - } - -/** Constructor. */ - public JavaCharStream(java.io.InputStream dstream) - { - this(dstream, 1, 1, 4096); - } - -/** Reinitialise. */ - public void ReInit(java.io.InputStream dstream, String encoding, int startline, - int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException - { - ReInit(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize); - } - -/** Reinitialise. */ - public void ReInit(java.io.InputStream dstream, int startline, - int startcolumn, int buffersize) - { - ReInit(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize); - } -/** Reinitialise. */ - public void ReInit(java.io.InputStream dstream, String encoding, int startline, - int startcolumn) throws java.io.UnsupportedEncodingException - { - ReInit(dstream, encoding, startline, startcolumn, 4096); - } -/** Reinitialise. */ - public void ReInit(java.io.InputStream dstream, int startline, - int startcolumn) - { - ReInit(dstream, startline, startcolumn, 4096); - } -/** Reinitialise. */ - public void ReInit(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException - { - ReInit(dstream, encoding, 1, 1, 4096); - } - -/** Reinitialise. */ - public void ReInit(java.io.InputStream dstream) - { - ReInit(dstream, 1, 1, 4096); - } - - /** @return token image as String */ - public String GetImage() - { - if (bufpos >= tokenBegin) - return new String(buffer, tokenBegin, bufpos - tokenBegin + 1); - else - return new String(buffer, tokenBegin, bufsize - tokenBegin) + - new String(buffer, 0, bufpos + 1); - } - - /** @return suffix */ - public char[] GetSuffix(int len) - { - char[] ret = new char[len]; - - if ((bufpos + 1) >= len) - System.arraycopy(buffer, bufpos - len + 1, ret, 0, len); - else - { - System.arraycopy(buffer, bufsize - (len - bufpos - 1), ret, 0, - len - bufpos - 1); - System.arraycopy(buffer, 0, ret, len - bufpos - 1, bufpos + 1); - } - - return ret; - } - - /** Set buffers back to null when finished. */ - public void Done() - { - nextCharBuf = null; - buffer = null; - bufline = null; - bufcolumn = null; - } - - /** - * Method to adjust line and column numbers for the start of a token. - */ - public void adjustBeginLineColumn(int newLine, int newCol) - { - int start = tokenBegin; - int len; - - if (bufpos >= tokenBegin) - { - len = bufpos - tokenBegin + inBuf + 1; - } - else - { - len = bufsize - tokenBegin + bufpos + 1 + inBuf; - } - - int i = 0, j = 0, k = 0; - int nextColDiff = 0, columnDiff = 0; - - while (i < len && - bufline[j = start % bufsize] == bufline[k = ++start % bufsize]) - { - bufline[j] = newLine; - nextColDiff = columnDiff + bufcolumn[k] - bufcolumn[j]; - bufcolumn[j] = newCol + columnDiff; - columnDiff = nextColDiff; - i++; - } - - if (i < len) - { - bufline[j] = newLine++; - bufcolumn[j] = newCol + columnDiff; - - while (i++ < len) - { - if (bufline[j = start % bufsize] != bufline[++start % bufsize]) - bufline[j] = newLine++; - else - bufline[j] = newLine; - } - } - - line = bufline[j]; - column = bufcolumn[j]; - } - -} -/* JavaCC - OriginalChecksum=7eecaeeaea1254b3e35fe8890a0127ce (do not edit this line) */ \ No newline at end of file Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/ParseException.java =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/ParseException.java (revision 1359190) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/ParseException.java (working copy) @@ -193,4 +193,4 @@ } } -/* JavaCC - OriginalChecksum=0f25f4245374bbf9920c9a82efecadd2 (do not edit this line) */ +/* JavaCC - OriginalChecksum=7601d49d11bc059457ae5850628ebc8a (do not edit this line) */ Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.java =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.java (revision 1359190) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.java (working copy) @@ -1,7 +1,7 @@ /* Generated By:JavaCC: Do not edit this line. StandardSyntaxParser.java */ package org.apache.lucene.queryparser.flexible.standard.parser; -/* +/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -49,14 +49,14 @@ // syntax parser constructor public StandardSyntaxParser() { - this(new StringReader("")); + this(new FastCharStream(new StringReader(""))); } /** Parses a query string, returning a {@link org.apache.lucene.queryparser.flexible.core.nodes.QueryNode}. * @param query the query string to be parsed. * @throws ParseException if the parsing fails */ public QueryNode parse(CharSequence query, CharSequence field) throws QueryNodeParseException { - ReInit(new StringReader(query.toString())); + ReInit(new FastCharStream(new StringReader(query.toString()))); try { // TopLevelQuery is a Query followed by the end-of-input (EOF) QueryNode querynode = TopLevelQuery(field); @@ -844,7 +844,6 @@ /** Generated Token Manager. */ public StandardSyntaxParserTokenManager token_source; - JavaCharStream jj_input_stream; /** Current token. */ public Token token; /** Next token. */ @@ -870,14 +869,9 @@ private boolean jj_rescan = false; private int jj_gc = 0; - /** Constructor with InputStream. */ - public StandardSyntaxParser(java.io.InputStream stream) { - this(stream, null); - } - /** Constructor with InputStream and supplied encoding */ - public StandardSyntaxParser(java.io.InputStream stream, String encoding) { - try { jj_input_stream = new JavaCharStream(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); } - token_source = new StandardSyntaxParserTokenManager(jj_input_stream); + /** Constructor with user supplied CharStream. */ + public StandardSyntaxParser(CharStream stream) { + token_source = new StandardSyntaxParserTokenManager(stream); token = new Token(); jj_ntk = -1; jj_gen = 0; @@ -886,13 +880,8 @@ } /** Reinitialise. */ - public void ReInit(java.io.InputStream stream) { - ReInit(stream, null); - } - /** Reinitialise. */ - public void ReInit(java.io.InputStream stream, String encoding) { - try { jj_input_stream.ReInit(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); } - token_source.ReInit(jj_input_stream); + public void ReInit(CharStream stream) { + token_source.ReInit(stream); token = new Token(); jj_ntk = -1; jj_gen = 0; @@ -900,28 +889,6 @@ for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); } - /** Constructor. */ - public StandardSyntaxParser(java.io.Reader stream) { - jj_input_stream = new JavaCharStream(stream, 1, 1); - token_source = new StandardSyntaxParserTokenManager(jj_input_stream); - token = new Token(); - jj_ntk = -1; - jj_gen = 0; - for (int i = 0; i < 28; i++) jj_la1[i] = -1; - for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); - } - - /** Reinitialise. */ - public void ReInit(java.io.Reader stream) { - jj_input_stream.ReInit(stream, 1, 1); - token_source.ReInit(jj_input_stream); - token = new Token(); - jj_ntk = -1; - jj_gen = 0; - for (int i = 0; i < 28; i++) jj_la1[i] = -1; - for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); - } - /** Constructor with generated Token Manager. */ public StandardSyntaxParser(StandardSyntaxParserTokenManager tm) { token_source = tm; Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.jj =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.jj (revision 1359190) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.jj (working copy) @@ -5,7 +5,7 @@ options { STATIC=false; JAVA_UNICODE_ESCAPE=true; - USER_CHAR_STREAM=false; + USER_CHAR_STREAM=true; IGNORE_CASE=false; JDK_VERSION="1.5"; } @@ -61,14 +61,14 @@ // syntax parser constructor public StandardSyntaxParser() { - this(new StringReader("")); + this(new FastCharStream(new StringReader(""))); } /** Parses a query string, returning a {@link org.apache.lucene.queryparser.flexible.core.nodes.QueryNode}. * @param query the query string to be parsed. * @throws ParseException if the parsing fails */ public QueryNode parse(CharSequence query, CharSequence field) throws QueryNodeParseException { - ReInit(new StringReader(query.toString())); + ReInit(new FastCharStream(new StringReader(query.toString()))); try { // TopLevelQuery is a Query followed by the end-of-input (EOF) QueryNode querynode = TopLevelQuery(field); Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.java =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.java (revision 1359190) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.java (working copy) @@ -1,6 +1,6 @@ /* Generated By:JavaCC: Do not edit this line. StandardSyntaxParserTokenManager.java */ package org.apache.lucene.queryparser.flexible.standard.parser; -/* +/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -781,25 +781,23 @@ static final long[] jjtoSkip = { 0x80L, }; -protected JavaCharStream input_stream; +protected CharStream input_stream; private final int[] jjrounds = new int[33]; private final int[] jjstateSet = new int[66]; protected char curChar; /** Constructor. */ -public StandardSyntaxParserTokenManager(JavaCharStream stream){ - if (JavaCharStream.staticFlag) - throw new Error("ERROR: Cannot use a static CharStream class with a non-static lexical analyzer."); +public StandardSyntaxParserTokenManager(CharStream stream){ input_stream = stream; } /** Constructor. */ -public StandardSyntaxParserTokenManager(JavaCharStream stream, int lexState){ +public StandardSyntaxParserTokenManager(CharStream stream, int lexState){ this(stream); SwitchTo(lexState); } /** Reinitialise parser. */ -public void ReInit(JavaCharStream stream) +public void ReInit(CharStream stream) { jjmatchedPos = jjnewStateCnt = 0; curLexState = defaultLexState; @@ -815,7 +813,7 @@ } /** Reinitialise parser. */ -public void ReInit(JavaCharStream stream, int lexState) +public void ReInit(CharStream stream, int lexState) { ReInit(stream); SwitchTo(lexState); Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/Token.java =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/Token.java (revision 1359190) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/Token.java (working copy) @@ -121,4 +121,4 @@ } } -/* JavaCC - OriginalChecksum=e9c55091ec11152bcd3a300ddff5c73a (do not edit this line) */ +/* JavaCC - OriginalChecksum=3b4fe6dcfcfa24a81f1c6ceffae5f73a (do not edit this line) */ Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/TokenMgrError.java =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/TokenMgrError.java (revision 1359190) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/TokenMgrError.java (working copy) @@ -138,4 +138,4 @@ this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason); } } -/* JavaCC - OriginalChecksum=76b513fd9c50f65248056bbeeff49277 (do not edit this line) */ +/* JavaCC - OriginalChecksum=1efb3d906925f2478637c66473b79bae (do not edit this line) */ Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/LowercaseExpandedTermsQueryNodeProcessor.java =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/LowercaseExpandedTermsQueryNodeProcessor.java (revision 1359190) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/LowercaseExpandedTermsQueryNodeProcessor.java (working copy) @@ -18,6 +18,7 @@ */ import java.util.List; +import java.util.Locale; import org.apache.lucene.queryparser.flexible.core.QueryNodeException; import org.apache.lucene.queryparser.flexible.core.config.QueryConfigHandler; @@ -46,7 +47,6 @@ QueryNodeProcessorImpl { public LowercaseExpandedTermsQueryNodeProcessor() { - // empty constructor } @Override @@ -63,6 +63,11 @@ @Override protected QueryNode postProcessNode(QueryNode node) throws QueryNodeException { + + Locale locale = getQueryConfigHandler().get(ConfigurationKeys.LOCALE); + if (locale == null) { + locale = Locale.getDefault(); + } if (node instanceof WildcardQueryNode || node instanceof FuzzyQueryNode @@ -71,7 +76,7 @@ TextableQueryNode txtNode = (TextableQueryNode) node; CharSequence text = txtNode.getText(); - txtNode.setText(text != null ? UnescapedCharSequence.toLowerCase(text) : null); + txtNode.setText(text != null ? UnescapedCharSequence.toLowerCase(text, locale) : null); } return node; Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/StandardQueryNodeProcessorPipeline.java =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/StandardQueryNodeProcessorPipeline.java (revision 1359190) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/StandardQueryNodeProcessorPipeline.java (working copy) @@ -17,12 +17,15 @@ * limitations under the License. */ +import java.util.Locale; + import org.apache.lucene.queryparser.flexible.core.config.QueryConfigHandler; import org.apache.lucene.queryparser.flexible.core.processors.NoChildOptimizationQueryNodeProcessor; import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorPipeline; import org.apache.lucene.queryparser.flexible.core.processors.RemoveDeletedQueryNodesProcessor; import org.apache.lucene.queryparser.flexible.standard.builders.StandardQueryTreeBuilder; import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler; +import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler.ConfigurationKeys; import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser; import org.apache.lucene.search.Query; Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/TermRangeQueryNodeProcessor.java =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/TermRangeQueryNodeProcessor.java (revision 1359190) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/TermRangeQueryNodeProcessor.java (working copy) @@ -22,6 +22,7 @@ import java.util.Date; import java.util.List; import java.util.Locale; +import java.util.TimeZone; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.DateTools.Resolution; @@ -76,6 +77,12 @@ locale = Locale.getDefault(); } + TimeZone timeZone = getQueryConfigHandler().get(ConfigurationKeys.TIMEZONE); + + if (timeZone == null) { + timeZone = TimeZone.getDefault(); + } + CharSequence field = termRangeNode.getField(); String fieldStr = null; @@ -114,7 +121,7 @@ // the time is set to the latest possible time of that date to // really // include all documents: - Calendar cal = Calendar.getInstance(locale); + Calendar cal = Calendar.getInstance(timeZone, locale); cal.setTime(d2); cal.set(Calendar.HOUR_OF_DAY, 23); cal.set(Calendar.MINUTE, 59); Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/StandardQueryParser.java =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/StandardQueryParser.java (revision 1359190) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/StandardQueryParser.java (working copy) @@ -19,6 +19,7 @@ import java.util.Locale; import java.util.Map; +import java.util.TimeZone; import java.util.TooManyListenersException; import org.apache.lucene.analysis.Analyzer; @@ -344,6 +345,14 @@ return getQueryConfigHandler().get(ConfigurationKeys.LOCALE); } + public void setTimeZone(TimeZone timeZone) { + getQueryConfigHandler().set(ConfigurationKeys.TIMEZONE, timeZone); + } + + public TimeZone getTimeZone() { + return getQueryConfigHandler().get(ConfigurationKeys.TIMEZONE); + } + /** * Sets the default slop for phrases. If zero, then exact phrase matches are * required. Default value is zero. Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/parser/FastCharStream.java =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/parser/FastCharStream.java (revision 1359190) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/parser/FastCharStream.java (working copy) @@ -96,7 +96,6 @@ try { input.close(); } catch (IOException e) { - System.err.println("Caught: " + e + "; ignoring."); } } Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/QueryTemplateManager.java =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/QueryTemplateManager.java (revision 1359190) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/QueryTemplateManager.java (working copy) @@ -11,7 +11,7 @@ import javax.xml.transform.dom.DOMResult; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; -import java.io.ByteArrayOutputStream; +import java.io.StringWriter; import java.io.IOException; import java.io.InputStream; import java.util.Enumeration; @@ -98,10 +98,11 @@ */ public static String getQueryAsXmlString(Properties formProperties, Templates template) throws ParserConfigurationException, TransformerException { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - StreamResult result = new StreamResult(baos); + // TODO: Suppress XML header with encoding (as Strings have no encoding) + StringWriter writer = new StringWriter(); + StreamResult result = new StreamResult(writer); transformCriteria(formProperties, template, result); - return baos.toString(); + return writer.toString(); } /** @@ -109,10 +110,11 @@ */ public static String getQueryAsXmlString(Properties formProperties, InputStream xslIs) throws SAXException, IOException, ParserConfigurationException, TransformerException { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - StreamResult result = new StreamResult(baos); + // TODO: Suppress XML header with encoding (as Strings have no encoding) + StringWriter writer = new StringWriter(); + StreamResult result = new StreamResult(writer); transformCriteria(formProperties, xslIs, result); - return baos.toString(); + return writer.toString(); } Index: lucene/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java =================================================================== --- lucene/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java (revision 1359190) +++ lucene/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java (working copy) @@ -17,6 +17,8 @@ * limitations under the License. */ +import java.util.Locale; + import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; @@ -70,7 +72,7 @@ ExtendableQueryParser parser = (ExtendableQueryParser) getParser(null, ext); String field = ext.buildExtensionField("testExt", "aField"); - Query query = parser.parse(String.format("%s:foo bar", field)); + Query query = parser.parse(String.format(Locale.ROOT, "%s:foo bar", field)); assertTrue("expected instance of BooleanQuery but was " + query.getClass(), query instanceof BooleanQuery); BooleanQuery bquery = (BooleanQuery) query; @@ -102,7 +104,7 @@ ExtendableQueryParser parser = (ExtendableQueryParser) getParser(null, ext); String field = ext.buildExtensionField("testExt"); - Query parse = parser.parse(String.format("%s:\"foo \\& bar\"", field)); + Query parse = parser.parse(String.format(Locale.ROOT, "%s:\"foo \\& bar\"", field)); assertTrue("expected instance of TermQuery but was " + parse.getClass(), parse instanceof TermQuery); TermQuery tquery = (TermQuery) parse; @@ -122,7 +124,7 @@ ExtendableQueryParser parser = (ExtendableQueryParser) getParser(null, ext); String field = ext.buildExtensionField("testExt", "afield"); - Query parse = parser.parse(String.format("%s:\"foo \\& bar\"", field)); + Query parse = parser.parse(String.format(Locale.ROOT, "%s:\"foo \\& bar\"", field)); assertTrue("expected instance of TermQuery but was " + parse.getClass(), parse instanceof TermQuery); TermQuery tquery = (TermQuery) parse; Index: lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/precedence/TestPrecedenceQueryParser.java =================================================================== --- lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/precedence/TestPrecedenceQueryParser.java (revision 1359190) +++ lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/precedence/TestPrecedenceQueryParser.java (working copy) @@ -24,7 +24,9 @@ import java.util.Date; import java.util.GregorianCalendar; import java.util.HashMap; +import java.util.Locale; import java.util.Map; +import java.util.TimeZone; import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; @@ -385,14 +387,16 @@ } public String getDate(String s) throws Exception { - DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT); + // we use the default Locale since LuceneTestCase randomizes it + DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, Locale.getDefault()); return DateTools.dateToString(df.parse(s), DateTools.Resolution.DAY); } private String getLocalizedDate(int year, int month, int day, boolean extendLastDate) { - DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT); - Calendar calendar = new GregorianCalendar(); + // we use the default Locale/TZ since LuceneTestCase randomizes it + DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, Locale.getDefault()); + Calendar calendar = new GregorianCalendar(TimeZone.getDefault(), Locale.getDefault()); calendar.set(year, month, day); if (extendLastDate) { calendar.set(Calendar.HOUR_OF_DAY, 23); @@ -406,7 +410,8 @@ public void testDateRange() throws Exception { String startDate = getLocalizedDate(2002, 1, 1, false); String endDate = getLocalizedDate(2002, 1, 4, false); - Calendar endDateExpected = new GregorianCalendar(); + // we use the default Locale/TZ since LuceneTestCase randomizes it + Calendar endDateExpected = new GregorianCalendar(TimeZone.getDefault(), Locale.getDefault()); endDateExpected.set(2002, 1, 4, 23, 59, 59); endDateExpected.set(Calendar.MILLISECOND, 999); final String defaultField = "default"; @@ -441,7 +446,8 @@ /** for testing DateTools support */ private String getDate(String s, DateTools.Resolution resolution) throws Exception { - DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT); + // we use the default Locale since LuceneTestCase randomizes it + DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, Locale.getDefault()); return getDate(df.parse(s), resolution); } Index: lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java =================================================================== --- lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java (revision 1359190) +++ lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java (working copy) @@ -27,6 +27,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.TimeZone; import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -675,7 +676,8 @@ /** for testing DateTools support */ private String getDate(String s, DateTools.Resolution resolution) throws Exception { - DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT); + // we use the default Locale since LuceneTestCase randomizes it + DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, Locale.getDefault()); return getDate(df.parse(s), resolution); } @@ -693,8 +695,9 @@ } private String getLocalizedDate(int year, int month, int day) { - DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT); - Calendar calendar = new GregorianCalendar(); + // we use the default Locale/TZ since LuceneTestCase randomizes it + DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, Locale.getDefault()); + Calendar calendar = new GregorianCalendar(TimeZone.getDefault(), Locale.getDefault()); calendar.clear(); calendar.set(year, month, day); calendar.set(Calendar.HOUR_OF_DAY, 23); @@ -707,7 +710,8 @@ public void testDateRange() throws Exception { String startDate = getLocalizedDate(2002, 1, 1); String endDate = getLocalizedDate(2002, 1, 4); - Calendar endDateExpected = new GregorianCalendar(); + // we use the default Locale/TZ since LuceneTestCase randomizes it + Calendar endDateExpected = new GregorianCalendar(TimeZone.getDefault(), Locale.getDefault()); endDateExpected.clear(); endDateExpected.set(2002, 1, 4, 23, 59, 59); endDateExpected.set(Calendar.MILLISECOND, 999); Index: lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java =================================================================== --- lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java (revision 1359190) +++ lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java (working copy) @@ -24,6 +24,7 @@ import java.util.Date; import java.util.GregorianCalendar; import java.util.Locale; +import java.util.TimeZone; import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -581,7 +582,8 @@ /** for testing DateTools support */ private String getDate(String s, DateTools.Resolution resolution) throws Exception { - DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT); + // we use the default Locale since LuceneTestCase randomizes it + DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, Locale.getDefault()); return getDate(df.parse(s), resolution); } @@ -591,8 +593,9 @@ } private String getLocalizedDate(int year, int month, int day) { - DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT); - Calendar calendar = new GregorianCalendar(); + // we use the default Locale/TZ since LuceneTestCase randomizes it + DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, Locale.getDefault()); + Calendar calendar = new GregorianCalendar(TimeZone.getDefault(), Locale.getDefault()); calendar.clear(); calendar.set(year, month, day); calendar.set(Calendar.HOUR_OF_DAY, 23); @@ -605,7 +608,8 @@ public void testDateRange() throws Exception { String startDate = getLocalizedDate(2002, 1, 1); String endDate = getLocalizedDate(2002, 1, 4); - Calendar endDateExpected = new GregorianCalendar(); + // we use the default Locale/TZ since LuceneTestCase randomizes it + Calendar endDateExpected = new GregorianCalendar(TimeZone.getDefault(), Locale.getDefault()); endDateExpected.clear(); endDateExpected.set(2002, 1, 4, 23, 59, 59); endDateExpected.set(Calendar.MILLISECOND, 999); Index: lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeFilterBuilder.java =================================================================== --- lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeFilterBuilder.java (revision 1359190) +++ lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeFilterBuilder.java (working copy) @@ -203,7 +203,7 @@ private static Document getDocumentFromString(String str) throws SAXException, IOException, ParserConfigurationException { - InputStream is = new ByteArrayInputStream(str.getBytes()); + InputStream is = new ByteArrayInputStream(str.getBytes("UTF-8")); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setNamespaceAware(true); DocumentBuilder builder = factory.newDocumentBuilder(); Index: lucene/spatial =================================================================== --- lucene/spatial (revision 1359190) +++ lucene/spatial (working copy) Property changes on: lucene/spatial ___________________________________________________________________ Added: svn:mergeinfo Merged /lucene/dev/branches/lucene4199/lucene/spatial:r1358548-1359191 Merged /lucene/dev/branches/lucene3969/lucene/spatial:r1311219-1324948 Merged /lucene/dev/branches/branch_3x/lucene/spatial:r1232954,1302749,1302808,1303007,1303023,1303269,1303733,1303854,1304295,1304360,1304660,1304904,1305074,1305142,1305681,1305693,1305719,1305741,1305816,1305837,1306929,1307050 Merged /lucene/dev/branches/branch_4x/lucene/spatial:r1344391,1344929,1348012,1348274,1348293,1348919,1348951,1349048,1349340,1349446,1349991,1353701,1355203,1356608 Merged /lucene/dev/branches/lucene4055/lucene/spatial:r1338960-1343359 Index: lucene/spatial/src/java/org/apache/lucene/spatial/bbox/BBoxStrategy.java =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/bbox/BBoxStrategy.java (revision 1359190) +++ lucene/spatial/src/java/org/apache/lucene/spatial/bbox/BBoxStrategy.java (working copy) @@ -122,7 +122,7 @@ ff.setIndexOptions(IndexOptions.DOCS_ONLY); ff.freeze(); - NumberFormat nf = NumberFormat.getInstance( Locale.US ); + NumberFormat nf = NumberFormat.getInstance( Locale.ROOT ); nf.setMaximumFractionDigits( 5 ); nf.setMinimumFractionDigits( 5 ); nf.setGroupingUsed(false); Index: lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java (revision 1359190) +++ lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/QuadPrefixTree.java (working copy) @@ -28,6 +28,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.List; +import java.util.Locale; /** * @lucene.experimental @@ -107,7 +108,7 @@ } public void printInfo() { - NumberFormat nf = NumberFormat.getNumberInstance(); + NumberFormat nf = NumberFormat.getNumberInstance(Locale.ROOT); nf.setMaximumFractionDigits(5); nf.setMinimumFractionDigits(5); nf.setMinimumIntegerDigits(3); Index: lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialArgs.java =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialArgs.java (revision 1359190) +++ lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialArgs.java (working copy) @@ -17,6 +17,8 @@ * limitations under the License. */ +import java.util.Locale; + import com.spatial4j.core.exception.InvalidSpatialArgument; import com.spatial4j.core.shape.Shape; @@ -64,7 +66,7 @@ if (max != null) { str.append(" max=").append(max); } - str.append(" distPrec=").append(String.format("%.2f%%", distPrecision / 100d)); + str.append(" distPrec=").append(String.format(Locale.ROOT, "%.2f%%", distPrecision / 100d)); str.append(')'); return str.toString(); } Index: lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialOperation.java =================================================================== --- lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialOperation.java (revision 1359190) +++ lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialOperation.java (working copy) @@ -61,14 +61,14 @@ this.sourceNeedsArea = sourceNeedsArea; this.targetNeedsArea = targetNeedsArea; registry.put(name, this); - registry.put(name.toUpperCase(Locale.US), this); + registry.put(name.toUpperCase(Locale.ROOT), this); list.add( this ); } public static SpatialOperation get( String v ) { SpatialOperation op = registry.get( v ); if( op == null ) { - op = registry.get(v.toUpperCase(Locale.US)); + op = registry.get(v.toUpperCase(Locale.ROOT)); } if( op == null ) { throw new InvalidSpatialArgument("Unknown Operation: " + v ); Index: lucene/suggest =================================================================== --- lucene/suggest (revision 1359190) +++ lucene/suggest (working copy) Property changes on: lucene/suggest ___________________________________________________________________ Added: svn:mergeinfo Merged /lucene/dev/branches/lucene3969/lucene/suggest:r1311219-1324948 Merged /lucene/dev/branches/branch_3x/lucene/suggest:r1232954,1302749,1302808,1303007,1303023,1303269,1303733,1303854,1304295,1304360,1304660,1304904,1305074,1305142,1305681,1305693,1305719,1305741,1305816,1305837,1306929,1307050 Merged /lucene/dev/branches/branch_4x/lucene/suggest:r1344391,1344929,1348012,1348274,1348293,1348919,1348951,1349048,1349340,1349446,1349991,1353701,1355203,1356608 Merged /lucene/dev/branches/lucene4055/lucene/suggest:r1338960-1343359 Merged /lucene/dev/branches/lucene4199/lucene/suggest:r1358548-1359191 Index: lucene/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java =================================================================== --- lucene/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java (revision 1359190) +++ lucene/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java (working copy) @@ -320,7 +320,7 @@ return new SuggestWord[0]; if (lowerCaseTerms) { - term = new Term(term.field(), text.toLowerCase(Locale.ENGLISH)); + term = new Term(term.field(), text.toLowerCase(Locale.ROOT)); } int docfreq = ir.docFreq(term); Index: lucene/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java =================================================================== --- lucene/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java (revision 1359190) +++ lucene/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java (working copy) @@ -38,12 +38,12 @@ private BufferedReader in; - public PlainTextDictionary(File file) throws FileNotFoundException { - in = new BufferedReader(new FileReader(file)); + public PlainTextDictionary(File file) throws IOException { + in = new BufferedReader(IOUtils.getDecodingReader(file, IOUtils.CHARSET_UTF_8)); } public PlainTextDictionary(InputStream dictFile) { - in = new BufferedReader(new InputStreamReader(dictFile)); + in = new BufferedReader(IOUtils.getDecodingReader(dictFile, IOUtils.CHARSET_UTF_8)); } /** Index: lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java =================================================================== --- lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java (revision 1359190) +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java (working copy) @@ -42,7 +42,7 @@ private boolean done = false; public FileDictionary(InputStream dictFile) { - in = new BufferedReader(new InputStreamReader(dictFile)); + in = new BufferedReader(IOUtils.getDecodingReader(dictFile, IOUtils.CHARSET_UTF_8)); } /** Index: lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java =================================================================== --- lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java (revision 1359190) +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java (working copy) @@ -132,7 +132,7 @@ @Override public String toString() { - return String.format(Locale.ENGLISH, + return String.format(Locale.ROOT, "time=%.2f sec. total (%.2f reading, %.2f sorting, %.2f merging), lines=%d, temp files=%d, merges=%d, soft ram limit=%.2f MB", totalTime / 1000.0d, readTime / 1000.0d, sortTime / 1000.0d, mergeTime / 1000.0d, lines, tempMergeFiles, mergeRounds, Index: lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java =================================================================== --- lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java (revision 1359190) +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java (working copy) @@ -35,9 +35,12 @@ import java.io.IOException; import java.io.InputStreamReader; import java.util.List; +import java.util.Locale; import java.util.Vector; import java.util.zip.GZIPInputStream; +import org.apache.lucene.util.IOUtils; + /** * Implementation of a Ternary Search Trie, a data structure for storing * String objects that combines the compact size of a binary search @@ -147,13 +150,20 @@ /** The base node in the trie. */ private TSTNode rootNode; + + private final Locale locale; /** * Constructs an empty Ternary Search Trie. */ public JaspellTernarySearchTrie() { + this(Locale.ROOT); } + public JaspellTernarySearchTrie(Locale locale) { + this.locale = locale; + } + // for loading void setRoot(TSTNode newRoot) { rootNode = newRoot; @@ -196,10 +206,10 @@ this(); BufferedReader in; if (compression) - in = new BufferedReader(new InputStreamReader(new GZIPInputStream( - new FileInputStream(file)))); - else in = new BufferedReader(new InputStreamReader((new FileInputStream( - file)))); + in = new BufferedReader(IOUtils.getDecodingReader(new GZIPInputStream( + new FileInputStream(file)), IOUtils.CHARSET_UTF_8)); + else in = new BufferedReader(IOUtils.getDecodingReader((new FileInputStream( + file)), IOUtils.CHARSET_UTF_8)); String word; int pos; Float occur, one = new Float(1); @@ -212,7 +222,7 @@ occur = Float.parseFloat(word.substring(pos + 1).trim()); word = word.substring(0, pos); } - String key = word.toLowerCase(); + String key = word.toLowerCase(locale); if (rootNode == null) { rootNode = new TSTNode(key.charAt(0), null); } @@ -242,7 +252,7 @@ if (occur2 != null) { occur += occur2.floatValue(); } - currentNode = getOrCreateNode(word.trim().toLowerCase()); + currentNode = getOrCreateNode(word.trim().toLowerCase(locale)); currentNode.data = occur; } } @@ -382,7 +392,7 @@ *@return The Float retrieved from the Ternary Search Trie. */ public Float getAndIncrement(String key) { - String key2 = key.trim().toLowerCase(); + String key2 = key.trim().toLowerCase(locale); TSTNode node = getNode(key2); if (node == null) { return null; @@ -763,7 +773,7 @@ * the Trie. */ public void remove(String key) { - deleteNode(getNode(key.trim().toLowerCase())); + deleteNode(getNode(key.trim().toLowerCase(locale))); } /** Index: lucene/suggest/src/test/org/apache/lucene/search/spell/TestSpellChecker.java =================================================================== --- lucene/suggest/src/test/org/apache/lucene/search/spell/TestSpellChecker.java (revision 1359190) +++ lucene/suggest/src/test/org/apache/lucene/search/spell/TestSpellChecker.java (working copy) @@ -22,6 +22,7 @@ import java.util.Collections; import java.util.Comparator; import java.util.List; +import java.util.Locale; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; @@ -435,8 +436,8 @@ executor.awaitTermination(60L, TimeUnit.SECONDS); for (int i = 0; i < workers.length; i++) { - assertFalse(String.format("worker thread %d failed", i), workers[i].failed); - assertTrue(String.format("worker thread %d is still running but should be terminated", i), workers[i].terminated); + assertFalse(String.format(Locale.ROOT, "worker thread %d failed", i), workers[i].failed); + assertTrue(String.format(Locale.ROOT, "worker thread %d is still running but should be terminated", i), workers[i].terminated); } // 4 searchers more than iterations // 1. at creation Index: lucene/suggest/src/test/org/apache/lucene/search/suggest/Average.java =================================================================== --- lucene/suggest/src/test/org/apache/lucene/search/suggest/Average.java (revision 1359190) +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/Average.java (working copy) @@ -47,7 +47,7 @@ public String toString() { - return String.format(Locale.ENGLISH, "%.0f [+- %.2f]", + return String.format(Locale.ROOT, "%.0f [+- %.2f]", avg, stddev); } Index: lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java =================================================================== --- lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java (revision 1359190) +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java (working copy) @@ -233,9 +233,9 @@ StringBuilder b = new StringBuilder(); String format = "%" + colLen + "s " + "%" + colLen + "s\n"; - b.append(String.format(Locale.ENGLISH, format, "Expected", "Result")); + b.append(String.format(Locale.ROOT, format, "Expected", "Result")); for (int i = 0; i < Math.max(result.length, expected.length); i++) { - b.append(String.format(Locale.ENGLISH, format, + b.append(String.format(Locale.ROOT, format, i < expected.length ? expected[i] : "--", i < result.length ? result[i] : "--")); } Index: lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java =================================================================== --- lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java (revision 1359190) +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java (working copy) @@ -119,7 +119,7 @@ }); System.err.println( - String.format(Locale.ENGLISH, "%-15s input: %d, time[ms]: %s", + String.format(Locale.ROOT, "%-15s input: %d, time[ms]: %s", cls.getSimpleName(), dictionaryInput.length, result.average.toString())); @@ -134,7 +134,7 @@ for (Class cls : benchmarkClasses) { Lookup lookup = buildLookup(cls, dictionaryInput); System.err.println( - String.format(Locale.ENGLISH, "%-15s size[B]:%,13d", + String.format(Locale.ROOT, "%-15s size[B]:%,13d", lookup.getClass().getSimpleName(), RamUsageEstimator.sizeOf(lookup))); } @@ -181,7 +181,7 @@ */ public void runPerformanceTest(final int minPrefixLen, final int maxPrefixLen, final int num, final boolean onlyMorePopular) throws Exception { - System.err.println(String.format(Locale.ENGLISH, + System.err.println(String.format(Locale.ROOT, "-- prefixes: %d-%d, num: %d, onlyMorePopular: %s", minPrefixLen, maxPrefixLen, num, onlyMorePopular)); @@ -206,7 +206,7 @@ }); System.err.println( - String.format(Locale.ENGLISH, "%-15s queries: %d, time[ms]: %s, ~kQPS: %.0f", + String.format(Locale.ROOT, "%-15s queries: %d, time[ms]: %s, ~kQPS: %.0f", lookup.getClass().getSimpleName(), input.size(), result.average.toString(), Index: lucene/test-framework =================================================================== --- lucene/test-framework (revision 1359190) +++ lucene/test-framework (working copy) Property changes on: lucene/test-framework ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/branches/lucene4199/lucene/test-framework:r1358548-1359191 Index: lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (revision 1359190) +++ lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (working copy) @@ -595,7 +595,7 @@ } else { // TODO: we can make ascii easier to read if we // don't escape... - sb.append(String.format("\\u%04x", c)); + sb.append(String.format(Locale.ROOT, "\\u%04x", c)); } charUpto += Character.charCount(c); } Index: lucene/test-framework/src/java/org/apache/lucene/index/AlcoholicMergePolicy.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/index/AlcoholicMergePolicy.java (revision 1359190) +++ lucene/test-framework/src/java/org/apache/lucene/index/AlcoholicMergePolicy.java (working copy) @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.Calendar; import java.util.GregorianCalendar; +import java.util.Locale; import java.util.Random; import java.util.TimeZone; @@ -45,7 +46,7 @@ private final Calendar calendar; public AlcoholicMergePolicy(TimeZone tz, Random random) { - this.calendar = new GregorianCalendar(tz); + this.calendar = new GregorianCalendar(tz, Locale.ROOT); this.random = random; maxMergeSize = _TestUtil.nextInt(random, 1024*1024, Integer.MAX_VALUE); } Index: lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java (revision 1359190) +++ lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java (working copy) @@ -71,7 +71,7 @@ codec = formats.get(Math.abs(perFieldSeed ^ name.hashCode()) % formats.size()); if (codec instanceof SimpleTextPostingsFormat && perFieldSeed % 5 != 0) { // make simpletext rarer, choose again - codec = formats.get(Math.abs(perFieldSeed ^ name.toUpperCase(Locale.ENGLISH).hashCode()) % formats.size()); + codec = formats.get(Math.abs(perFieldSeed ^ name.toUpperCase(Locale.ROOT).hashCode()) % formats.size()); } previousMappings.put(name, codec); // Safety: Index: lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java (revision 1359190) +++ lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java (working copy) @@ -357,7 +357,7 @@ // - end with one of: "product of:", "sum of:", "max of:", or // - have "max plus times others" (where is float). float x = 0; - String descr = expl.getDescription().toLowerCase(Locale.ENGLISH); + String descr = expl.getDescription().toLowerCase(Locale.ROOT); boolean productOf = descr.endsWith("product of:"); boolean sumOf = descr.endsWith("sum of:"); boolean maxOf = descr.endsWith("max of:"); Index: lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java (revision 1359190) +++ lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java (working copy) @@ -187,15 +187,15 @@ ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); CheckIndex checker = new CheckIndex(dir); checker.setCrossCheckTermVectors(crossCheckTermVectors); - checker.setInfoStream(new PrintStream(bos), false); + checker.setInfoStream(new PrintStream(bos, false, "UTF-8"), false); CheckIndex.Status indexStatus = checker.checkIndex(null); if (indexStatus == null || indexStatus.clean == false) { System.out.println("CheckIndex failed"); - System.out.println(bos.toString()); + System.out.println(bos.toString("UTF-8")); throw new RuntimeException("CheckIndex failed"); } else { if (LuceneTestCase.INFOSTREAM) { - System.out.println(bos.toString()); + System.out.println(bos.toString("UTF-8")); } return indexStatus; } Index: lucene/test-framework/src/java/org/apache/lucene/util/TestRuleIcuHack.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/util/TestRuleIcuHack.java (revision 1359190) +++ lucene/test-framework/src/java/org/apache/lucene/util/TestRuleIcuHack.java (working copy) @@ -39,7 +39,7 @@ if (!icuTested.getAndSet(true)) { Locale previous = Locale.getDefault(); try { - Locale.setDefault(Locale.US); + Locale.setDefault(Locale.ROOT); Class.forName("com.ibm.icu.util.ULocale"); } catch (ClassNotFoundException cnfe) { // ignore if no ICU is in classpath Index: lucene/tools =================================================================== --- lucene/tools (revision 1359190) +++ lucene/tools (working copy) Property changes on: lucene/tools ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/branches/lucene4199/lucene/tools:r1358548-1359191 Index: lucene/tools/build.xml =================================================================== --- lucene/tools/build.xml (revision 1359190) +++ lucene/tools/build.xml (working copy) @@ -24,7 +24,11 @@ - + + + + + - + + + Index: lucene/tools/custom-tasks.xml =================================================================== --- lucene/tools/custom-tasks.xml (revision 1359190) +++ lucene/tools/custom-tasks.xml (working copy) @@ -5,19 +5,23 @@ for standalone use. + + + + + + + + + + + - - - - - - - License check under: @{dir} Index: lucene/tools/forbiddenApis/commons-io.txt =================================================================== --- lucene/tools/forbiddenApis/commons-io.txt (revision 0) +++ lucene/tools/forbiddenApis/commons-io.txt (working copy) @@ -0,0 +1,20 @@ +# These methods and classes from commons-io should not be used by Solr classes (unsafe, no charset,...): + +org.apache.commons.io.IOUtils#copy(java.io.InputStream,java.io.Writer) +org.apache.commons.io.IOUtils#copy(java.io.Reader,java.io.OutputStream) +org.apache.commons.io.IOUtils#readLines(java.io.InputStream) +org.apache.commons.io.IOUtils#toByteArray(java.io.Reader) +org.apache.commons.io.IOUtils#toByteArray(java.lang.String) +org.apache.commons.io.IOUtils#toCharArray(java.io.InputStream) +org.apache.commons.io.IOUtils#toInputStream(java.lang.CharSequence) +org.apache.commons.io.IOUtils#toInputStream(java.lang.String) +org.apache.commons.io.IOUtils#toString(byte[]) +org.apache.commons.io.IOUtils#toString(java.io.InputStream) +org.apache.commons.io.IOUtils#toString(java.net.URI) +org.apache.commons.io.IOUtils#toString(java.net.URL) +org.apache.commons.io.IOUtils#write(byte[],java.io.Writer) +org.apache.commons.io.IOUtils#write(char[],java.io.OutputStream) +org.apache.commons.io.IOUtils#write(java.lang.CharSequence,java.io.OutputStream) +org.apache.commons.io.IOUtils#write(java.lang.StringBuffer,java.io.OutputStream) +org.apache.commons.io.IOUtils#write(java.lang.String,java.io.OutputStream) +org.apache.commons.io.IOUtils#writeLines(java.util.Collection,java.lang.String,java.io.OutputStream) Index: lucene/tools/forbiddenApis/commons-io.txt =================================================================== --- lucene/tools/forbiddenApis/commons-io.txt (revision 1359191) +++ lucene/tools/forbiddenApis/commons-io.txt (working copy) Property changes on: lucene/tools/forbiddenApis/commons-io.txt ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/tools/forbiddenApis/jdk.txt =================================================================== --- lucene/tools/forbiddenApis/jdk.txt (revision 0) +++ lucene/tools/forbiddenApis/jdk.txt (working copy) @@ -0,0 +1,71 @@ +# These methods and classes should not be used by Lucene classes (unsafe, no charset,...): + +java.lang.String#(byte[]) +java.lang.String#(byte[],int) +java.lang.String#(byte[],int,int) +java.lang.String#(byte[],int,int,int) +java.lang.String#getBytes() +java.lang.String#getBytes(int,int,byte[],int) +java.lang.String#toLowerCase() +java.lang.String#toUpperCase() +java.lang.String#format(java.lang.String,java.lang.Object[]) + +java.io.FileReader +java.io.FileWriter +java.io.ByteArrayOutputStream#toString() +java.io.InputStreamReader#(java.io.InputStream) +java.io.OutputStreamWriter#(java.io.OutputStream) +java.io.PrintStream#(java.io.File) +java.io.PrintStream#(java.io.OutputStream) +java.io.PrintStream#(java.io.OutputStream,boolean) +java.io.PrintStream#(java.lang.String) +java.io.PrintWriter#(java.io.File) +java.io.PrintWriter#(java.io.OutputStream) +java.io.PrintWriter#(java.io.OutputStream,boolean) +java.io.PrintWriter#(java.lang.String) +java.io.PrintWriter#format(java.lang.String,java.lang.Object[]) +java.io.PrintWriter#printf(java.lang.String,java.lang.Object[]) + +java.nio.charset.Charset#displayName() + +java.text.BreakIterator#getCharacterInstance() +java.text.BreakIterator#getLineInstance() +java.text.BreakIterator#getSentenceInstance() +java.text.BreakIterator#getWordInstance() +java.text.Collator#getInstance() +java.text.DateFormat#getTimeInstance() +java.text.DateFormat#getTimeInstance(int) +java.text.DateFormat#getDateInstance() +java.text.DateFormat#getDateInstance(int) +java.text.DateFormat#getDateTimeInstance() +java.text.DateFormat#getDateTimeInstance(int,int) +java.text.DateFormat#getInstance() +java.text.DateFormatSymbols#() +java.text.DateFormatSymbols#getInstance() +java.text.DecimalFormatSymbols#() +java.text.DecimalFormatSymbols#getInstance() +java.text.MessageFormat#(java.lang.String) +java.text.NumberFormat#getInstance() +java.text.NumberFormat#getNumberInstance() +java.text.NumberFormat#getIntegerInstance() +java.text.NumberFormat#getCurrencyInstance() +java.text.NumberFormat#getPercentInstance() +java.text.SimpleDateFormat#() +java.text.SimpleDateFormat#(java.lang.String) + +java.util.Calendar#() +java.util.Calendar#getInstance() +java.util.Calendar#getInstance(java.util.Locale) +java.util.Calendar#getInstance(java.util.TimeZone) +java.util.GregorianCalendar#() +java.util.GregorianCalendar#(int,int,int) +java.util.GregorianCalendar#(int,int,int,int,int) +java.util.GregorianCalendar#(int,int,int,int,int,int) +java.util.GregorianCalendar#(java.util.Locale) +java.util.GregorianCalendar#(java.util.TimeZone) +java.util.Scanner#(java.io.InputStream) +java.util.Scanner#(java.io.File) +java.util.Scanner#(java.nio.channels.ReadableByteChannel) +java.util.Formatter#(java.lang.String) +java.util.Formatter#(java.io.File) +java.util.Formatter#(java.io.OutputStream) Index: lucene/tools/forbiddenApis/jdk.txt =================================================================== --- lucene/tools/forbiddenApis/jdk.txt (revision 1359191) +++ lucene/tools/forbiddenApis/jdk.txt (working copy) Property changes on: lucene/tools/forbiddenApis/jdk.txt ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/tools/ivy.xml =================================================================== --- lucene/tools/ivy.xml (revision 1359190) +++ lucene/tools/ivy.xml (working copy) @@ -18,4 +18,12 @@ --> + + + + + Index: lucene/tools/lib =================================================================== --- lucene/tools/lib (revision 1359191) +++ lucene/tools/lib (working copy) Property changes on: lucene/tools/lib ___________________________________________________________________ Added: svn:ignore ## -0,0 +1 ## +*.jar Index: lucene/tools/lib/asm-debug-all-4.0.jar.sha1 =================================================================== --- lucene/tools/lib/asm-debug-all-4.0.jar.sha1 (revision 0) +++ lucene/tools/lib/asm-debug-all-4.0.jar.sha1 (working copy) @@ -0,0 +1 @@ +2340f4db0d1a57ba3a430597c42875c827a4cb69 Index: lucene/tools/lib/asm-debug-all-LICENSE-BSD_LIKE.txt =================================================================== --- lucene/tools/lib/asm-debug-all-LICENSE-BSD_LIKE.txt (revision 0) +++ lucene/tools/lib/asm-debug-all-LICENSE-BSD_LIKE.txt (working copy) @@ -0,0 +1,29 @@ +Copyright (c) 2000-2011 INRIA, France Telecom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holders nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +THE POSSIBILITY OF SUCH DAMAGE. Index: lucene/tools/lib/asm-debug-all-LICENSE-BSD_LIKE.txt =================================================================== --- lucene/tools/lib/asm-debug-all-LICENSE-BSD_LIKE.txt (revision 1359191) +++ lucene/tools/lib/asm-debug-all-LICENSE-BSD_LIKE.txt (working copy) Property changes on: lucene/tools/lib/asm-debug-all-LICENSE-BSD_LIKE.txt ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/tools/lib/asm-debug-all-NOTICE.txt =================================================================== --- lucene/tools/lib/asm-debug-all-NOTICE.txt (revision 0) +++ lucene/tools/lib/asm-debug-all-NOTICE.txt (working copy) @@ -0,0 +1,2 @@ +ASM - Lightweight Java Bytecode Manipulation Framework +Copyright © 1999-2012, OW2 Consortium Index: lucene/tools/lib/asm-debug-all-NOTICE.txt =================================================================== --- lucene/tools/lib/asm-debug-all-NOTICE.txt (revision 1359191) +++ lucene/tools/lib/asm-debug-all-NOTICE.txt (working copy) Property changes on: lucene/tools/lib/asm-debug-all-NOTICE.txt ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/tools/src/java/lucene-solr.antlib.xml =================================================================== --- lucene/tools/src/java/lucene-solr.antlib.xml (revision 1359190) +++ lucene/tools/src/java/lucene-solr.antlib.xml (working copy) @@ -18,4 +18,7 @@ + Index: lucene/tools/src/java/org/apache/lucene/validation/ForbiddenApisCheckTask.java =================================================================== --- lucene/tools/src/java/org/apache/lucene/validation/ForbiddenApisCheckTask.java (revision 0) +++ lucene/tools/src/java/org/apache/lucene/validation/ForbiddenApisCheckTask.java (working copy) @@ -0,0 +1,318 @@ +package org.apache.lucene.validation; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.objectweb.asm.ClassReader; +import org.objectweb.asm.Label; +import org.objectweb.asm.ClassVisitor; +import org.objectweb.asm.MethodVisitor; +import org.objectweb.asm.Opcodes; +import org.objectweb.asm.Type; +import org.objectweb.asm.commons.Method; +import org.objectweb.asm.tree.ClassNode; +import org.objectweb.asm.tree.MethodNode; + +import org.apache.tools.ant.AntClassLoader; +import org.apache.tools.ant.BuildException; +import org.apache.tools.ant.Project; +import org.apache.tools.ant.Task; +import org.apache.tools.ant.types.Path; +import org.apache.tools.ant.types.FileSet; +import org.apache.tools.ant.types.Reference; +import org.apache.tools.ant.types.Resource; +import org.apache.tools.ant.types.ResourceCollection; +import org.apache.tools.ant.types.resources.FileResource; +import org.apache.tools.ant.types.resources.Resources; +import org.apache.tools.ant.types.resources.FileResource; +import org.apache.tools.ant.types.resources.StringResource; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.BufferedReader; +import java.io.Reader; +import java.io.File; +import java.io.StringReader; +import java.util.Arrays; +import java.util.Formatter; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Locale; +import java.util.Map; + +/** + * Task to check if a set of class files contains calls to forbidden APIs + * from a given classpath and list of API signatures (either inline or as pointer to files). + */ +public class ForbiddenApisCheckTask extends Task { + + private final Resources classFiles = new Resources(); + private final Resources apiSignatures = new Resources(); + private Path classpath = null; + + private final Map classCache = new HashMap(); + private final Map forbiddenMethods = new HashMap(); + private final Map forbiddenClasses = new HashMap(); + + /** Adds the method signature to the list of disallowed methods. The Signature is checked against the given ClassLoader. */ + private void addSignature(ClassLoader loader, String signature) throws BuildException { + final int p = signature.indexOf('#'); + final String clazz; + final Method dummy; + if (p >= 0) { + clazz = signature.substring(0, p); + // we ignore the return type, its just to match easier (so return type is void): + dummy = Method.getMethod("void " + signature.substring(p+1), true); + } else { + clazz = signature; + dummy = null; + } + // check class & method signature, if it is really existent (in classpath), but we don't really load the class into JVM: + try { + ClassNode c = classCache.get(clazz); + if (c == null) { + final ClassReader reader; + if (loader != null) { + final InputStream in = loader.getResourceAsStream(clazz.replace('.', '/') + ".class"); + if (in == null) { + throw new BuildException("Loading of class " + clazz + " failed: Not found"); + } + try { + reader = new ClassReader(in); + } finally { + in.close(); + } + } else { + // load from build classpath + reader = new ClassReader(clazz); + } + reader.accept(c = new ClassNode(Opcodes.ASM4), ClassReader.SKIP_CODE | ClassReader.SKIP_DEBUG | ClassReader.SKIP_FRAMES); + classCache.put(clazz, c); + } + if (dummy != null) { + // list all methods with this signature: + boolean found = false; + for (final MethodNode mn : c.methods) { + if (mn.name.equals(dummy.getName()) && Arrays.equals(Type.getArgumentTypes(mn.desc), dummy.getArgumentTypes())) { + found = true; + forbiddenMethods.put(c.name + '\000' + new Method(mn.name, mn.desc), signature); + // don't break when found, as there may be more covariant overrides! + } + } + if (!found) + throw new BuildException("No method found with following signature: " + signature); + } else { + // only add the signature as class name + forbiddenClasses.put(c.name, signature); + } + } catch (IOException e) { + throw new BuildException("Loading of class " + clazz + " failed.", e); + } + } + + /** Parses a class given as Resource and checks for valid method invocations */ + private int checkClass(final Resource res) throws IOException { + final InputStream stream = res.getInputStream(); + try { + final int[] violations = new int[1]; + new ClassReader(stream).accept(new ClassVisitor(Opcodes.ASM4) { + String className = null, source = null; + + @Override + public void visit(int version, int access, String name, String signature, String superName, String[] interfaces) { + // save class name in source code format: + this.className = Type.getObjectType(name).getClassName(); + } + + @Override + public void visitSource(String source, String debug) { + this.source = source; + } + + @Override + public MethodVisitor visitMethod(int access, String name, String desc, String signature, String[] exceptions) { + return new MethodVisitor(Opcodes.ASM4) { + private int lineNo = -1; + + @Override + public void visitMethodInsn(int opcode, String owner, String name, String desc) { + boolean found = false; + String printout = forbiddenClasses.get(owner); + if (printout != null) { + found = true; + log("Forbidden class use: " + printout, Project.MSG_ERR); + } else { + printout = forbiddenMethods.get(owner + '\000' + new Method(name, desc)); + if (printout != null) { + found = true; + log("Forbidden method invocation: " + printout, Project.MSG_ERR); + } + } + if (found) { + violations[0]++; + final StringBuilder sb = new StringBuilder(" in ").append(className); + if (source != null && lineNo >= 0) { + new Formatter(sb, Locale.ROOT).format(" (%s:%d)", source, lineNo).flush(); + } + log(sb.toString(), Project.MSG_ERR); + } + } + + @Override + public void visitLineNumber(int lineNo, Label start) { + this.lineNo = lineNo; + } + }; + } + }, ClassReader.SKIP_FRAMES); + return violations[0]; + } finally { + stream.close(); + } + } + + /** Reads a list of API signatures. Closes the Reader when done (on Exception, too)! */ + private void parseApiFile(ClassLoader loader, Reader reader) throws IOException { + final BufferedReader r = new BufferedReader(reader); + try { + String line; + while ((line = r.readLine()) != null) { + line = line.trim(); + if (line.length() == 0 || line.startsWith("#")) + continue; + addSignature(loader, line); + } + } finally { + r.close(); + } + } + + @Override + public void execute() throws BuildException { + AntClassLoader loader = null; + try { + if (classpath != null) { + classpath.setProject(getProject()); + loader = getProject().createClassLoader(classpath); + } + classFiles.setProject(getProject()); + apiSignatures.setProject(getProject()); + + try { + @SuppressWarnings("unchecked") + Iterator iter = (Iterator) apiSignatures.iterator(); + if (!iter.hasNext()) { + throw new BuildException("You need to supply at least one API signature definition through apiFile=, , or inner text."); + } + while (iter.hasNext()) { + final Resource r = iter.next(); + if (!r.isExists()) { + throw new BuildException("Resource does not exist: " + r); + } + if (r instanceof StringResource) { + parseApiFile(loader, new StringReader(((StringResource) r).getValue())); + } else { + parseApiFile(loader, new InputStreamReader(r.getInputStream(), "UTF-8")); + } + } + } catch (IOException ioe) { + throw new BuildException("IO problem while reading files with API signatures.", ioe); + } + if (forbiddenMethods.isEmpty() && forbiddenClasses.isEmpty()) { + throw new BuildException("No API signatures found; use apiFile=, , or inner text to define those!"); + } + + long start = System.currentTimeMillis(); + + int checked = 0; + int errors = 0; + @SuppressWarnings("unchecked") + Iterator iter = (Iterator) classFiles.iterator(); + if (!iter.hasNext()) { + throw new BuildException("There is no given or the fileset does not contain any class files to check."); + } + while (iter.hasNext()) { + final Resource r = iter.next(); + if (!r.isExists()) { + throw new BuildException("Class file does not exist: " + r); + } + + try { + errors += checkClass(r); + } catch (IOException ioe) { + throw new BuildException("IO problem while reading class file " + r, ioe); + } + checked++; + } + + log(String.format(Locale.ROOT, + "Scanned %d class file(s) for forbidden API invocations (in %.2fs), %d error(s).", + checked, (System.currentTimeMillis() - start) / 1000.0, errors), + errors > 0 ? Project.MSG_ERR : Project.MSG_INFO); + + if (errors > 0) { + throw new BuildException("Check for forbidden API calls failed, see log."); + } + } finally { + if (loader != null) loader.cleanup(); + } + } + + /** Set of class files to check */ + public void add(ResourceCollection rc) { + classFiles.add(rc); + } + + /** A file with API signatures apiFile= attribute */ + public void setApiFile(File file) { + apiSignatures.add(new FileResource(getProject(), file)); + } + + /** Set of files with API signatures as nested element */ + public FileSet createApiFileSet() { + final FileSet fs = new FileSet(); + fs.setProject(getProject()); + apiSignatures.add(fs); + return fs; + } + + /** Support for API signatures list as nested text */ + public void addText(String text) { + apiSignatures.add(new StringResource(getProject(), text)); + } + + /** Classpath as classpath= attribute */ + public void setClasspath(Path classpath) { + createClasspath().append(classpath); + } + + /** Classpath as classpathRef= attribute */ + public void setClasspathRef(Reference r) { + createClasspath().setRefid(r); + } + + /** Classpath as nested element */ + public Path createClasspath() { + if (this.classpath == null) { + this.classpath = new Path(getProject()); + } + return this.classpath.createPath(); + } + +} Index: lucene/tools/src/java/org/apache/lucene/validation/ForbiddenApisCheckTask.java =================================================================== --- lucene/tools/src/java/org/apache/lucene/validation/ForbiddenApisCheckTask.java (revision 1359191) +++ lucene/tools/src/java/org/apache/lucene/validation/ForbiddenApisCheckTask.java (working copy) Property changes on: lucene/tools/src/java/org/apache/lucene/validation/ForbiddenApisCheckTask.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/tools/src/java/org/apache/lucene/validation/LicenseCheckTask.java =================================================================== --- lucene/tools/src/java/org/apache/lucene/validation/LicenseCheckTask.java (revision 1359190) +++ lucene/tools/src/java/org/apache/lucene/validation/LicenseCheckTask.java (working copy) @@ -140,7 +140,7 @@ checked++; } - log(String.format(Locale.ENGLISH, + log(String.format(Locale.ROOT, "Scanned %d JAR file(s) for licenses (in %.2fs.), %d error(s).", checked, (System.currentTimeMillis() - start) / 1000.0, errors), errors > 0 ? Project.MSG_ERR : Project.MSG_INFO); @@ -255,7 +255,7 @@ private static final String createChecksumString(byte[] digest) { StringBuilder checksum = new StringBuilder(); for (int i = 0; i < digest.length; i++) { - checksum.append(String.format(Locale.ENGLISH, "%02x", + checksum.append(String.format(Locale.ROOT, "%02x", CHECKSUM_BYTE_MASK & digest[i])); } return checksum.toString(); Index: solr =================================================================== --- solr (revision 1359190) +++ solr (working copy) Property changes on: solr ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/branches/lucene4199/solr:r1358548-1359191 Index: solr/build.xml =================================================================== --- solr/build.xml (revision 1359190) +++ solr/build.xml (working copy) @@ -130,7 +130,7 @@ + depends="test-core, test-contrib"/> - - + + + + @@ -187,6 +189,21 @@ + + + + + + + + + + + + + + + Index: solr/build.xml =================================================================== --- solr/build.xml (revision 1359190) +++ solr/build.xml (working copy) Property changes on: solr/build.xml ___________________________________________________________________ Added: svn:mergeinfo Merged /lucene/dev/branches/lucene3969/solr/build.xml:r1311219-1324948 Merged /lucene/dev/branches/branch_3x/solr/build.xml:r949730,961612,979161,980654,982195,987811,988512,1025544,1026614,1034080,1039151,1050654,1056762,1060014,1060438,1060784,1061035,1063880,1063934,1065324,1070818,1075044,1079376,1079381,1080071,1081052,1083240,1085811,1090423,1091501,1092373,1095519,1098770,1099210,1100407,1100480,1100494,1100515,1101106,1103086,1128215,1128227,1128462,1129419,1129435,1130612,1132581,1132978,1134823,1134917,1136714,1138351,1139430,1139461,1141060,1144653,1148611,1148849,1148864,1150434,1151830,1151947,1152766,1166791,1171384,1173311,1173701,1173745,1177994,1184955,1188805,1189914,1189969,1198337,1199847,1204494,1204519,1204547,1204565,1204826,1207010,1208375,1226235,1229268,1230429,1232954,1234450,1235713,1244960,1302703,1302733,1302749,1302808,1303269,1303733,1304295,1304360,1304660,1304904,1305074,1305574,1305585,1305622,1305681,1305693,1305741,1306929,1310314 Merged /lucene/dev/branches/branch_4x/solr/build.xml:r1349991 Merged /lucene/dev/branches/lucene_solr_3_1/solr/build.xml:r1081856,1083239,1085499,1085511,1085532,1085809,1101103 Merged /lucene/dev/branches/lucene_solr_3_2/solr/build.xml:r1128223,1128247,1129418,1129472 Merged /lucene/solr/branches/newtrunk/solr/build.xml:r924462 Merged /lucene/dev/branches/lucene_solr_3_3/solr/build.xml:r1138390,1138979,1139775 Merged /lucene/java/branches/lucene_3_0/solr/build.xml:r880793,896906 Merged /lucene/java/branches/lucene_2_4/solr/build.xml:r748824 Merged /lucene/dev/branches/bulkpostings/solr/build.xml:r1069647,1069925 Merged /lucene/dev/branches/docvalues/solr/build.xml:r1021634-1134288 Merged /lucene/dev/branches/lucene2621/solr/build.xml:r1188713-1197598,1197605-1199706,1199787-1202835 Merged /lucene/java/branches/lucene_2_9/solr/build.xml:r817269-818600,825998,829134,829881,831036,896850,909334 Merged /lucene/dev/branches/realtime_search/solr/build.xml:r953476-1097796 Merged /lucene/dev/branches/lucene3622/solr/build.xml:r1211241-1213825 Merged /lucene/dev/branches/lucene4055/solr/build.xml:r1338960-1343359 Merged /lucene/dev/branches/lucene3606/solr/build.xml:r1209906-1212022 Merged /lucene/dev/branches/lucene3661/solr/build.xml:r1233476-1237242,1237250-1238012 Merged /lucene/dev/branches/solr2452/solr/build.xml:r1087601-1144716 Merged /lucene/java/branches/lucene_2_9_back_compat_tests/solr/build.xml:r818601-821336 Merged /lucene/dev/branches/LUCENE2793/solr/build.xml:r1137551-1144189 Merged /lucene/solr/trunk/build.xml:r922950-923910,923912-925091 Merged /lucene/dev/branches/lucene4199/solr/build.xml:r1358548-1359191 Merged /lucene/dev/branches/lucene2858/solr/build.xml:r1234440-1238051 Merged /lucene/dev/branches/lucene3795_lsp_spatial_module/solr/build.xml:r1291491-1300396 Merged /lucene/dev/branches/preflexfixes/solr/build.xml:r967125-979432 Index: solr/CHANGES.txt =================================================================== --- solr/CHANGES.txt (revision 1359190) +++ solr/CHANGES.txt (working copy) Property changes on: solr/CHANGES.txt ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/branches/lucene4199/solr/CHANGES.txt:r1358548-1359191 Index: solr/contrib =================================================================== --- solr/contrib (revision 1359190) +++ solr/contrib (working copy) Property changes on: solr/contrib ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/branches/lucene4199/solr/contrib:r1358548-1359191 Index: solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/MorfologikFilterFactory.java =================================================================== --- solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/MorfologikFilterFactory.java (revision 1359190) +++ solr/contrib/analysis-extras/src/java/org/apache/solr/analysis/MorfologikFilterFactory.java (working copy) @@ -69,7 +69,7 @@ String dictionaryName = args.get(DICTIONARY_SCHEMA_ATTRIBUTE); if (dictionaryName != null && !dictionaryName.isEmpty()) { try { - DICTIONARY dictionary = DICTIONARY.valueOf(dictionaryName.toUpperCase(Locale.ENGLISH)); + DICTIONARY dictionary = DICTIONARY.valueOf(dictionaryName.toUpperCase(Locale.ROOT)); assert dictionary != null; this.dictionary = dictionary; } catch (IllegalArgumentException e) { Index: solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java =================================================================== --- solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java (revision 1359190) +++ solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java (working copy) @@ -171,7 +171,7 @@ InputStream is = part.getInputStream(); String fileName = part.getFileName(); Metadata md = new Metadata(); - md.set(HttpHeaders.CONTENT_TYPE, ctype.getBaseType().toLowerCase(Locale.ENGLISH)); + md.set(HttpHeaders.CONTENT_TYPE, ctype.getBaseType().toLowerCase(Locale.ROOT)); md.set(TikaMetadataKeys.RESOURCE_NAME_KEY, fileName); String content = tika.parseToString(is, md); if (disp != null && disp.equalsIgnoreCase(Part.ATTACHMENT)) { Index: solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImporter.java =================================================================== --- solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImporter.java (revision 1359190) +++ solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImporter.java (working copy) @@ -128,7 +128,7 @@ private void loadSchemaFieldMap() { Map modLnvsf = new HashMap(); for (Map.Entry entry : schema.getFields().entrySet()) { - modLnvsf.put(entry.getKey().toLowerCase(Locale.ENGLISH), entry.getValue()); + modLnvsf.put(entry.getKey().toLowerCase(Locale.ROOT), entry.getValue()); } lowerNameVsSchemaField = Collections.unmodifiableMap(modLnvsf); } @@ -139,7 +139,7 @@ schemaField = schema.getFieldOrNull(caseInsensitiveName); } if (schemaField == null) { - schemaField = lowerNameVsSchemaField.get(caseInsensitiveName.toLowerCase(Locale.ENGLISH)); + schemaField = lowerNameVsSchemaField.get(caseInsensitiveName.toLowerCase(Locale.ROOT)); } return schemaField; } Index: solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java =================================================================== --- solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java (revision 1359190) +++ solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java (working copy) @@ -72,7 +72,7 @@ } Object val = row.get(srcCol); - String styleSmall = style.toLowerCase(Locale.ENGLISH); + String styleSmall = style.toLowerCase(Locale.ROOT); if (val instanceof List) { List inputs = (List) val; Index: solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestCachedSqlEntityProcessor.java =================================================================== --- solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestCachedSqlEntityProcessor.java (revision 1359190) +++ solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestCachedSqlEntityProcessor.java (working copy) @@ -108,7 +108,7 @@ if (r == null) break; rows.add(r); - assertEquals(r.get("desc").toString().toUpperCase(Locale.ENGLISH), r.get("desc")); + assertEquals(r.get("desc").toString().toUpperCase(Locale.ROOT), r.get("desc")); } assertEquals(2, rows.size()); assertEquals(2, rows.get(0).size()); @@ -176,7 +176,7 @@ Object val = entry.getValue(); if (val instanceof String) { String s = (String) val; - entry.setValue(s.toUpperCase(Locale.ENGLISH)); + entry.setValue(s.toUpperCase(Locale.ROOT)); } } return row; Index: solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java =================================================================== --- solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java (revision 1359190) +++ solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java (working copy) @@ -138,7 +138,7 @@ String streamType = req.getParams().get(ExtractingParams.STREAM_TYPE, null); if (streamType != null) { //Cache? Parsers are lightweight to construct and thread-safe, so I'm told - MediaType mt = MediaType.parse(streamType.trim().toLowerCase(Locale.ENGLISH)); + MediaType mt = MediaType.parse(streamType.trim().toLowerCase(Locale.ROOT)); parser = new DefaultParser(config.getMediaTypeRegistry()).getParsers().get(mt); } else { parser = autoDetectParser; Index: solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/RegexRulesPasswordProvider.java =================================================================== --- solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/RegexRulesPasswordProvider.java (revision 1359190) +++ solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/RegexRulesPasswordProvider.java (working copy) @@ -20,12 +20,12 @@ import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; import java.util.LinkedHashMap; import java.util.Map.Entry; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; +import org.apache.lucene.util.IOUtils; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaMetadataKeys; import org.apache.tika.parser.PasswordProvider; @@ -73,7 +73,7 @@ */ public static LinkedHashMap parseRulesFile(InputStream is) { LinkedHashMap rules = new LinkedHashMap(); - BufferedReader br = new BufferedReader(new InputStreamReader(is)); + BufferedReader br = new BufferedReader(IOUtils.getDecodingReader(is, IOUtils.CHARSET_UTF_8)); String line; try { int linenum = 0; Index: solr/core =================================================================== --- solr/core (revision 1359190) +++ solr/core (working copy) Property changes on: solr/core ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/branches/lucene4199/solr/core:r1358548-1359191 Index: solr/core/src/java/org/apache/solr/analysis/JapaneseTokenizerFactory.java =================================================================== --- solr/core/src/java/org/apache/solr/analysis/JapaneseTokenizerFactory.java (revision 1359190) +++ solr/core/src/java/org/apache/solr/analysis/JapaneseTokenizerFactory.java (working copy) @@ -93,7 +93,7 @@ private Mode getMode(Map args) { String mode = args.get(MODE); if (mode != null) { - return Mode.valueOf(mode.toUpperCase(Locale.ENGLISH)); + return Mode.valueOf(mode.toUpperCase(Locale.ROOT)); } else { return JapaneseTokenizer.DEFAULT_MODE; } Index: solr/core/src/java/org/apache/solr/analysis/LegacyHTMLStripCharFilter.java =================================================================== --- solr/core/src/java/org/apache/solr/analysis/LegacyHTMLStripCharFilter.java (revision 1359190) +++ solr/core/src/java/org/apache/solr/analysis/LegacyHTMLStripCharFilter.java (working copy) @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.InputStreamReader; import java.io.Reader; +import java.nio.charset.Charset; import java.util.HashMap; import java.util.Set; @@ -71,7 +72,7 @@ public static void main(String[] args) throws IOException { Reader in = new LegacyHTMLStripCharFilter( - CharReader.get(new InputStreamReader(System.in))); + CharReader.get(new InputStreamReader(System.in, Charset.defaultCharset()))); int ch; while ( (ch=in.read()) != -1 ) System.out.print((char)ch); } Index: solr/core/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java =================================================================== --- solr/core/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java (revision 1359190) +++ solr/core/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java (working copy) @@ -68,12 +68,12 @@ private static final Map> registry = new HashMap>(6); static { - registry.put("DoubleMetaphone".toUpperCase(Locale.ENGLISH), DoubleMetaphone.class); - registry.put("Metaphone".toUpperCase(Locale.ENGLISH), Metaphone.class); - registry.put("Soundex".toUpperCase(Locale.ENGLISH), Soundex.class); - registry.put("RefinedSoundex".toUpperCase(Locale.ENGLISH), RefinedSoundex.class); - registry.put("Caverphone".toUpperCase(Locale.ENGLISH), Caverphone2.class); - registry.put("ColognePhonetic".toUpperCase(Locale.ENGLISH), ColognePhonetic.class); + registry.put("DoubleMetaphone".toUpperCase(Locale.ROOT), DoubleMetaphone.class); + registry.put("Metaphone".toUpperCase(Locale.ROOT), Metaphone.class); + registry.put("Soundex".toUpperCase(Locale.ROOT), Soundex.class); + registry.put("RefinedSoundex".toUpperCase(Locale.ROOT), RefinedSoundex.class); + registry.put("Caverphone".toUpperCase(Locale.ROOT), Caverphone2.class); + registry.put("ColognePhonetic".toUpperCase(Locale.ROOT), ColognePhonetic.class); } protected boolean inject = true; @@ -93,7 +93,7 @@ throw new InitializationException("Missing required parameter: " + ENCODER + " [" + registry.keySet() + "]"); } - clazz = registry.get(name.toUpperCase(Locale.ENGLISH)); + clazz = registry.get(name.toUpperCase(Locale.ROOT)); if( clazz == null ) { clazz = resolveEncoder(name); } Index: solr/core/src/java/org/apache/solr/cloud/SolrZkServer.java =================================================================== --- solr/core/src/java/org/apache/solr/cloud/SolrZkServer.java (revision 1359190) +++ solr/core/src/java/org/apache/solr/cloud/SolrZkServer.java (working copy) @@ -20,8 +20,8 @@ import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; -import java.io.FileReader; import java.io.IOException; +import java.io.InputStreamReader; import java.net.InetAddress; import java.net.InetSocketAddress; import java.net.UnknownHostException; @@ -29,6 +29,7 @@ import java.util.Properties; import java.util.Map.Entry; +import org.apache.lucene.util.IOUtils; import org.apache.solr.common.SolrException; import org.apache.zookeeper.server.ServerConfig; import org.apache.zookeeper.server.ZooKeeperServerMain; @@ -462,7 +463,7 @@ + " file is missing"); } - BufferedReader br = new BufferedReader(new FileReader(myIdFile)); + BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(myIdFile), IOUtils.CHARSET_UTF_8)); String myIdString; try { myIdString = br.readLine(); Index: solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java =================================================================== --- solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java (revision 1359190) +++ solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java (working copy) @@ -198,7 +198,7 @@ log.warn("No lockType configured for " + dir + " assuming 'simple'"); rawLockType = "simple"; } - final String lockType = rawLockType.toLowerCase(Locale.ENGLISH).trim(); + final String lockType = rawLockType.toLowerCase(Locale.ROOT).trim(); if ("simple".equals(lockType)) { // multiple SimpleFSLockFactory instances should be OK Index: solr/core/src/java/org/apache/solr/core/CoreContainer.java =================================================================== --- solr/core/src/java/org/apache/solr/core/CoreContainer.java (revision 1359190) +++ solr/core/src/java/org/apache/solr/core/CoreContainer.java (working copy) @@ -781,7 +781,7 @@ if (schemaFile.exists()) { String key = schemaFile.getAbsolutePath() + ":" - + new SimpleDateFormat("yyyyMMddHHmmss", Locale.US).format(new Date( + + new SimpleDateFormat("yyyyMMddHHmmss", Locale.ROOT).format(new Date( schemaFile.lastModified())); schema = indexSchemaCache.get(key); if (schema == null) { Index: solr/core/src/java/org/apache/solr/core/JmxMonitoredMap.java =================================================================== --- solr/core/src/java/org/apache/solr/core/JmxMonitoredMap.java (revision 1359190) +++ solr/core/src/java/org/apache/solr/core/JmxMonitoredMap.java (working copy) @@ -281,7 +281,7 @@ } else if (staticStats.contains(attribute) && attribute != null && attribute.length() > 0) { try { - String getter = "get" + attribute.substring(0, 1).toUpperCase(Locale.ENGLISH) + String getter = "get" + attribute.substring(0, 1).toUpperCase(Locale.ROOT) + attribute.substring(1); Method meth = infoBean.getClass().getMethod(getter); val = meth.invoke(infoBean); Index: solr/core/src/java/org/apache/solr/core/SolrConfig.java =================================================================== --- solr/core/src/java/org/apache/solr/core/SolrConfig.java (revision 1359190) +++ solr/core/src/java/org/apache/solr/core/SolrConfig.java (working copy) @@ -344,7 +344,7 @@ /** Input must not be null */ public static LastModFrom parse(final String s) { try { - return valueOf(s.toUpperCase(Locale.ENGLISH)); + return valueOf(s.toUpperCase(Locale.ROOT)); } catch (Exception e) { log.warn( "Unrecognized value for lastModFrom: " + s, e); return BOGUS; Index: solr/core/src/java/org/apache/solr/core/SolrCore.java =================================================================== --- solr/core/src/java/org/apache/solr/core/SolrCore.java (revision 1359190) +++ solr/core/src/java/org/apache/solr/core/SolrCore.java (working copy) @@ -1913,13 +1913,13 @@ Set hide = new HashSet(); for (String file : solrConfig.getResourceLoader().listConfigDir()) { - hide.add(file.toUpperCase(Locale.ENGLISH)); + hide.add(file.toUpperCase(Locale.ROOT)); } // except the "gettable" list StringTokenizer st = new StringTokenizer( gettable ); while( st.hasMoreTokens() ) { - hide.remove( st.nextToken().toUpperCase(Locale.ENGLISH) ); + hide.remove( st.nextToken().toUpperCase(Locale.ROOT) ); } for( String s : hide ) { invariants.add( ShowFileRequestHandler.HIDDEN, s ); Index: solr/core/src/java/org/apache/solr/core/SolrDeletionPolicy.java =================================================================== --- solr/core/src/java/org/apache/solr/core/SolrDeletionPolicy.java (revision 1359190) +++ solr/core/src/java/org/apache/solr/core/SolrDeletionPolicy.java (working copy) @@ -145,7 +145,7 @@ try { if (maxCommitAge != null) { if (maxCommitAgeTimeStamp==-1) { - DateMathParser dmp = new DateMathParser(DateField.UTC, Locale.US); + DateMathParser dmp = new DateMathParser(DateField.UTC, Locale.ROOT); maxCommitAgeTimeStamp = dmp.parseMath(maxCommitAge).getTime(); } if (IndexDeletionPolicyWrapper.getCommitTimestamp(commit) < maxCommitAgeTimeStamp) { Index: solr/core/src/java/org/apache/solr/handler/admin/ShowFileRequestHandler.java =================================================================== --- solr/core/src/java/org/apache/solr/handler/admin/ShowFileRequestHandler.java (revision 1359190) +++ solr/core/src/java/org/apache/solr/handler/admin/ShowFileRequestHandler.java (working copy) @@ -101,7 +101,7 @@ String[] hidden = invariants.getParams( HIDDEN ); if( hidden != null ) { for( String s : hidden ) { - hiddenFiles.add( s.toUpperCase(Locale.ENGLISH) ); + hiddenFiles.add( s.toUpperCase(Locale.ROOT) ); } } } @@ -138,7 +138,7 @@ adminFile = confPath; } else { fname = fname.replace('\\', '/'); // normalize slashes - if (hiddenFiles.contains(fname.toUpperCase(Locale.ENGLISH))) { + if (hiddenFiles.contains(fname.toUpperCase(Locale.ROOT))) { throw new SolrException(ErrorCode.FORBIDDEN, "Can not access: " + fname); } if (fname.indexOf("..") >= 0) { @@ -159,7 +159,7 @@ NamedList> files = new SimpleOrderedMap>(); for (String f : children) { - if (hiddenFiles.contains(f.toUpperCase(Locale.ENGLISH))) { + if (hiddenFiles.contains(f.toUpperCase(Locale.ROOT))) { continue; // don't show 'hidden' files } if (f.startsWith(".")) { @@ -214,7 +214,7 @@ } else { fname = fname.replace( '\\', '/' ); // normalize slashes - if( hiddenFiles.contains( fname.toUpperCase(Locale.ENGLISH) ) ) { + if( hiddenFiles.contains( fname.toUpperCase(Locale.ROOT) ) ) { throw new SolrException( ErrorCode.FORBIDDEN, "Can not access: "+fname ); } if( fname.indexOf( ".." ) >= 0 ) { @@ -241,7 +241,7 @@ for( File f : adminFile.listFiles() ) { String path = f.getAbsolutePath().substring( basePath ); path = path.replace( '\\', '/' ); // normalize slashes - if( hiddenFiles.contains( path.toUpperCase(Locale.ENGLISH) ) ) { + if( hiddenFiles.contains( path.toUpperCase(Locale.ROOT) ) ) { continue; // don't show 'hidden' files } if( f.isHidden() || f.getName().startsWith( "." ) ) { Index: solr/core/src/java/org/apache/solr/handler/admin/SolrInfoMBeanHandler.java =================================================================== --- solr/core/src/java/org/apache/solr/handler/admin/SolrInfoMBeanHandler.java (revision 1359190) +++ solr/core/src/java/org/apache/solr/handler/admin/SolrInfoMBeanHandler.java (working copy) @@ -255,9 +255,9 @@ .append(ref).append(", Now: ").append(now); if(ref instanceof Number) { - NumberFormat nf = NumberFormat.getIntegerInstance(Locale.ENGLISH); + NumberFormat nf = NumberFormat.getIntegerInstance(Locale.ROOT); if((ref instanceof Double) || (ref instanceof Float)) { - nf = NumberFormat.getInstance(Locale.ENGLISH); + nf = NumberFormat.getInstance(Locale.ROOT); } double dref = ((Number)ref).doubleValue(); double dnow = ((Number)now).doubleValue(); Index: solr/core/src/java/org/apache/solr/handler/admin/SystemInfoHandler.java =================================================================== --- solr/core/src/java/org/apache/solr/handler/admin/SystemInfoHandler.java (revision 1359190) +++ solr/core/src/java/org/apache/solr/handler/admin/SystemInfoHandler.java (working copy) @@ -19,12 +19,14 @@ import java.io.DataInputStream; import java.io.File; +import java.io.InputStreamReader; import java.lang.management.ManagementFactory; import java.lang.management.OperatingSystemMXBean; import java.lang.management.RuntimeMXBean; import java.lang.reflect.Method; import java.net.InetAddress; import java.net.UnknownHostException; +import java.nio.charset.Charset; import java.text.DecimalFormat; import java.util.Date; import java.util.Locale; @@ -131,7 +133,7 @@ addGetterIfAvaliable( os, "maxFileDescriptorCount", info ); try { - if( !os.getName().toLowerCase(Locale.ENGLISH).startsWith( "windows" ) ) { + if( !os.getName().toLowerCase(Locale.ROOT).startsWith( "windows" ) ) { // Try some command line things info.add( "uname", execute( "uname -a" ) ); info.add( "uptime", execute( "uptime" ) ); @@ -180,7 +182,7 @@ process = Runtime.getRuntime().exec(cmd); in = new DataInputStream( process.getInputStream() ); // use default charset from locale here, because the command invoked also uses the default locale: - return IOUtils.toString(in); + return IOUtils.toString(new InputStreamReader(in, Charset.defaultCharset())); } catch( Exception ex ) { // ignore - log.warn("Error executing command", ex); Index: solr/core/src/java/org/apache/solr/handler/admin/ThreadDumpHandler.java =================================================================== --- solr/core/src/java/org/apache/solr/handler/admin/ThreadDumpHandler.java (revision 1359190) +++ solr/core/src/java/org/apache/solr/handler/admin/ThreadDumpHandler.java (working copy) @@ -21,6 +21,7 @@ import java.lang.management.ManagementFactory; import java.lang.management.ThreadInfo; import java.lang.management.ThreadMXBean; +import java.util.Locale; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; @@ -119,7 +120,7 @@ } private static String formatNanos(long ns) { - return String.format("%.4fms", ns / (double) 1000000); + return String.format(Locale.ROOT, "%.4fms", ns / (double) 1000000); } //////////////////////// SolrInfoMBeans methods ////////////////////// Index: solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java =================================================================== --- solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java (revision 1359190) +++ solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java (working copy) @@ -180,7 +180,7 @@ try { responseFormat = Grouping.Format.valueOf(formatStr); } catch (IllegalArgumentException e) { - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, String.format("Illegal %s parameter", GroupParams.GROUP_FORMAT)); + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, String.format(Locale.ROOT, "Illegal %s parameter", GroupParams.GROUP_FORMAT)); } groupingSpec.setResponseFormat(responseFormat); @@ -386,7 +386,7 @@ if (grouping.isSignalCacheWarning()) { rsp.add( "cacheWarning", - String.format("Cache limit of %d percent relative to maxdoc has exceeded. Please increase cache size or disable caching.", maxDocsPercentageToCache) + String.format(Locale.ROOT, "Cache limit of %d percent relative to maxdoc has exceeded. Please increase cache size or disable caching.", maxDocsPercentageToCache) ); } rb.setResult(result); Index: solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java =================================================================== --- solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java (revision 1359190) +++ solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java (working copy) @@ -242,7 +242,7 @@ int flags = 0; for (String flagParam : flagParams) { try { - flags |= TermsParams.TermsRegexpFlag.valueOf(flagParam.toUpperCase(Locale.ENGLISH)).getValue(); + flags |= TermsParams.TermsRegexpFlag.valueOf(flagParam.toUpperCase(Locale.ROOT)).getValue(); } catch (IllegalArgumentException iae) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown terms regex flag '" + flagParam + "'"); } Index: solr/core/src/java/org/apache/solr/handler/PingRequestHandler.java =================================================================== --- solr/core/src/java/org/apache/solr/handler/PingRequestHandler.java (revision 1359190) +++ solr/core/src/java/org/apache/solr/handler/PingRequestHandler.java (working copy) @@ -191,7 +191,7 @@ } else { try { - action = ACTIONS.valueOf(actionParam.toUpperCase(Locale.ENGLISH)); + action = ACTIONS.valueOf(actionParam.toUpperCase(Locale.ROOT)); } catch (IllegalArgumentException iae){ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, Index: solr/core/src/java/org/apache/solr/handler/SnapPuller.java =================================================================== --- solr/core/src/java/org/apache/solr/handler/SnapPuller.java (revision 1359190) +++ solr/core/src/java/org/apache/solr/handler/SnapPuller.java (working copy) @@ -530,7 +530,7 @@ * All the files are copied to a temp dir first */ private File createTempindexDir(SolrCore core) { - String tmpIdxDirName = "index." + new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.US).format(new Date()); + String tmpIdxDirName = "index." + new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT).format(new Date()); File tmpIdxDir = new File(core.getDataDir(), tmpIdxDirName); tmpIdxDir.mkdirs(); return tmpIdxDir; @@ -713,7 +713,7 @@ } private String getDateAsStr(Date d) { - return new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.US).format(d); + return new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT).format(d); } /** Index: solr/core/src/java/org/apache/solr/handler/SnapShooter.java =================================================================== --- solr/core/src/java/org/apache/solr/handler/SnapShooter.java (revision 1359190) +++ solr/core/src/java/org/apache/solr/handler/SnapShooter.java (working copy) @@ -91,7 +91,7 @@ if(numberToKeep streams ) throws Exception { - String method = req.getMethod().toUpperCase(Locale.ENGLISH); + String method = req.getMethod().toUpperCase(Locale.ROOT); if( "GET".equals( method ) || "HEAD".equals( method )) { return new ServletSolrParams(req); } @@ -390,7 +390,7 @@ if( idx > 0 ) { // remove the charset definition "; charset=utf-8" contentType = contentType.substring( 0, idx ); } - if( "application/x-www-form-urlencoded".equals( contentType.toLowerCase(Locale.ENGLISH) ) ) { + if( "application/x-www-form-urlencoded".equals( contentType.toLowerCase(Locale.ROOT) ) ) { return new ServletSolrParams(req); // just get the params from parameterMap } if( ServletFileUpload.isMultipartContent(req) ) { Index: solr/core/src/java/org/apache/solr/spelling/WordBreakSolrSpellChecker.java =================================================================== --- solr/core/src/java/org/apache/solr/spelling/WordBreakSolrSpellChecker.java (revision 1359190) +++ solr/core/src/java/org/apache/solr/spelling/WordBreakSolrSpellChecker.java (working copy) @@ -22,6 +22,7 @@ import java.util.Collections; import java.util.Iterator; import java.util.List; +import java.util.Locale; import java.util.regex.Pattern; import org.apache.lucene.analysis.Token; @@ -126,7 +127,7 @@ wbsp = new WordBreakSpellChecker(); String bstb = strParam(config, PARAM_BREAK_SUGGESTION_TIE_BREAKER); if (bstb != null) { - bstb = bstb.toUpperCase(); + bstb = bstb.toUpperCase(Locale.ROOT); if (bstb.equals(BreakSuggestionTieBreaker.SUM_FREQ.name())) { sortMethod = BreakSuggestionSortMethod.NUM_CHANGES_THEN_SUMMED_FREQUENCY; } else if (bstb.equals(BreakSuggestionTieBreaker.MAX_FREQ.name())) { Index: solr/core/src/java/org/apache/solr/update/processor/URLClassifyProcessor.java =================================================================== --- solr/core/src/java/org/apache/solr/update/processor/URLClassifyProcessor.java (revision 1359190) +++ solr/core/src/java/org/apache/solr/update/processor/URLClassifyProcessor.java (working copy) @@ -21,6 +21,8 @@ import java.net.URI; import java.net.URISyntaxException; import java.net.URL; +import java.util.Locale; + import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.params.SolrParams; import org.apache.solr.request.SolrQueryRequest; @@ -213,7 +215,7 @@ } private String landingPageSuffix(URL url) { - String path = url.getPath().toLowerCase(); + String path = url.getPath().toLowerCase(Locale.ROOT); for(String suffix : landingPageSuffixes) { if(path.endsWith(suffix)) { return suffix; @@ -223,6 +225,6 @@ } private String getPathWithoutSuffix(URL url) { - return url.getPath().toLowerCase().replaceFirst(landingPageSuffix(url)+"$", ""); + return url.getPath().toLowerCase(Locale.ROOT).replaceFirst(landingPageSuffix(url)+"$", ""); } } Index: solr/core/src/java/org/apache/solr/update/processor/UUIDUpdateProcessorFactory.java =================================================================== --- solr/core/src/java/org/apache/solr/update/processor/UUIDUpdateProcessorFactory.java (revision 1359190) +++ solr/core/src/java/org/apache/solr/update/processor/UUIDUpdateProcessorFactory.java (working copy) @@ -59,7 +59,7 @@ UpdateRequestProcessor next ) { return new DefaultValueUpdateProcessor(fieldName, next) { public Object getDefaultValue() { - return UUID.randomUUID().toString().toLowerCase(Locale.ENGLISH); + return UUID.randomUUID().toString().toLowerCase(Locale.ROOT); } }; } Index: solr/core/src/java/org/apache/solr/update/SolrIndexWriter.java =================================================================== --- solr/core/src/java/org/apache/solr/update/SolrIndexWriter.java (revision 1359190) +++ solr/core/src/java/org/apache/solr/update/SolrIndexWriter.java (working copy) @@ -75,7 +75,7 @@ File parent = f.getParentFile(); if (parent != null) parent.mkdirs(); FileOutputStream fos = new FileOutputStream(f, true); - return new PrintStreamInfoStream(new PrintStream(fos, true)); + return new PrintStreamInfoStream(new PrintStream(fos, true, "UTF-8")); } else { return InfoStream.NO_OUTPUT; } Index: solr/core/src/java/org/apache/solr/update/UpdateLog.java =================================================================== --- solr/core/src/java/org/apache/solr/update/UpdateLog.java (revision 1359190) +++ solr/core/src/java/org/apache/solr/update/UpdateLog.java (working copy) @@ -756,7 +756,7 @@ private void ensureLog() { if (tlog == null) { - String newLogName = String.format(Locale.ENGLISH, LOG_FILENAME_PATTERN, TLOG_NAME, id); + String newLogName = String.format(Locale.ROOT, LOG_FILENAME_PATTERN, TLOG_NAME, id); tlog = new TransactionLog(new File(tlogDir, newLogName), globalStrings); } } Index: solr/core/src/java/org/apache/solr/util/DateMathParser.java =================================================================== --- solr/core/src/java/org/apache/solr/util/DateMathParser.java (revision 1359190) +++ solr/core/src/java/org/apache/solr/util/DateMathParser.java (working copy) @@ -83,8 +83,8 @@ /** Default TimeZone for DateMath rounding (UTC) */ public static final TimeZone DEFAULT_MATH_TZ = UTC; - /** Default Locale for DateMath rounding (Locale.US) */ - public static final Locale DEFAULT_MATH_LOCALE = Locale.US; + /** Default Locale for DateMath rounding (Locale.ROOT) */ + public static final Locale DEFAULT_MATH_LOCALE = Locale.ROOT; /** * A mapping from (uppercased) String labels idenyifying time units, Index: solr/core/src/java/org/apache/solr/util/NumberUtils.java =================================================================== --- solr/core/src/java/org/apache/solr/util/NumberUtils.java (revision 1359190) +++ solr/core/src/java/org/apache/solr/util/NumberUtils.java (working copy) @@ -18,6 +18,7 @@ package org.apache.solr.util; import java.text.NumberFormat; +import java.util.Locale; import org.apache.lucene.util.BytesRef; @@ -27,7 +28,7 @@ public class NumberUtils { public static String readableSize(long size) { - NumberFormat formatter = NumberFormat.getNumberInstance(); + NumberFormat formatter = NumberFormat.getNumberInstance(Locale.ROOT); formatter.setMaximumFractionDigits(2); if (size / (1024 * 1024 * 1024) > 0) { return formatter.format(size * 1.0d / (1024 * 1024 * 1024)) + " GB"; Index: solr/core/src/test/org/apache/solr/cloud/ZkTestServer.java =================================================================== --- solr/core/src/test/org/apache/solr/cloud/ZkTestServer.java (revision 1359190) +++ solr/core/src/test/org/apache/solr/cloud/ZkTestServer.java (working copy) @@ -304,7 +304,7 @@ reader = new BufferedReader( - new InputStreamReader(sock.getInputStream())); + new InputStreamReader(sock.getInputStream(), "US-ASCII")); StringBuilder sb = new StringBuilder(); String line; while((line = reader.readLine()) != null) { Index: solr/core/src/test/org/apache/solr/core/TestConfig.java =================================================================== --- solr/core/src/test/org/apache/solr/core/TestConfig.java (revision 1359190) +++ solr/core/src/test/org/apache/solr/core/TestConfig.java (working copy) @@ -109,7 +109,7 @@ //System.out.println( handler.getHiddenFiles() ); // should not contain: solrconfig.xml scheam.xml admin-extra.html - assertFalse(handler.getHiddenFiles().contains("scheam.xml".toUpperCase(Locale.ENGLISH))); + assertFalse(handler.getHiddenFiles().contains("scheam.xml".toUpperCase(Locale.ROOT))); assertTrue(handler.getHiddenFiles().contains("PROTWORDS.TXT")); } Index: solr/core/src/test/org/apache/solr/core/TestCoreContainer.java =================================================================== --- solr/core/src/test/org/apache/solr/core/TestCoreContainer.java (revision 1359190) +++ solr/core/src/test/org/apache/solr/core/TestCoreContainer.java (working copy) @@ -19,14 +19,16 @@ import java.io.BufferedWriter; import java.io.File; -import java.io.FileWriter; +import java.io.FileOutputStream; import java.io.IOException; +import java.io.OutputStreamWriter; import javax.xml.parsers.ParserConfigurationException; import javax.xml.xpath.XPathExpressionException; import org.apache.commons.io.FileUtils; import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.util.IOUtils; import org.apache.solr.SolrTestCaseJ4; import org.junit.BeforeClass; import org.junit.Test; @@ -172,7 +174,7 @@ assertTrue("Failed to mkdirs workDir", solrHomeDirectory.mkdirs()); try { File solrXmlFile = new File(solrHomeDirectory, "solr.xml"); - BufferedWriter out = new BufferedWriter(new FileWriter(solrXmlFile)); + BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(solrXmlFile), IOUtils.CHARSET_UTF_8)); out.write(EMPTY_SOLR_XML); out.close(); } catch (IOException e) { Index: solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java =================================================================== --- solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java (revision 1359190) +++ solr/core/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java (working copy) @@ -19,6 +19,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.MapSolrParams; @@ -34,6 +35,7 @@ import java.io.File; import java.io.FileOutputStream; +import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.util.HashMap; import java.util.Map; @@ -412,7 +414,7 @@ // write a test file to boost some docs private void writeFile(File file, String query, String... ids) throws Exception { - PrintWriter out = new PrintWriter(new FileOutputStream(file)); + PrintWriter out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(file), IOUtils.CHARSET_UTF_8)); out.println(""); out.println(""); out.println(""); Index: solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java =================================================================== --- solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java (revision 1359190) +++ solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java (working copy) @@ -174,7 +174,7 @@ public void testFieldStatisticsResultsDateField() throws Exception { SolrCore core = h.getCore(); - DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.US); + DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.ROOT); dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); String date1 = dateFormat.format(new Date(123456789)) + "Z"; Index: solr/core/src/test/org/apache/solr/handler/TestReplicationHandler.java =================================================================== --- solr/core/src/test/org/apache/solr/handler/TestReplicationHandler.java (revision 1359190) +++ solr/core/src/test/org/apache/solr/handler/TestReplicationHandler.java (working copy) @@ -18,11 +18,13 @@ import java.io.BufferedReader; import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; +import java.io.FileInputStream; +import java.io.FileOutputStream; import java.io.FilenameFilter; import java.io.IOException; import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; import java.io.Writer; import java.net.URL; import java.util.regex.Matcher; @@ -929,8 +931,8 @@ * character copy of file using UTF-8. If port is non-null, will be substituted any time "TEST_PORT" is found. */ private static void copyFile(File src, File dst, Integer port) throws IOException { - BufferedReader in = new BufferedReader(new FileReader(src)); - Writer out = new FileWriter(dst); + BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(src), "UTF-8")); + Writer out = new OutputStreamWriter(new FileOutputStream(dst), "UTF-8"); for (String line = in.readLine(); null != line; line = in.readLine()) { Index: solr/core/src/test/org/apache/solr/internal/csv/writer/CSVConfigGuesserTest.java =================================================================== --- solr/core/src/test/org/apache/solr/internal/csv/writer/CSVConfigGuesserTest.java (revision 1359190) +++ solr/core/src/test/org/apache/solr/internal/csv/writer/CSVConfigGuesserTest.java (working copy) @@ -43,7 +43,7 @@ * 1234 ; abcd ; 1234 ; * */ - public void testConfigGuess1() { + public void testConfigGuess1() throws Exception { CSVConfig expected = new CSVConfig(); expected.setDelimiter(';'); expected.setValueDelimiter(' '); @@ -57,7 +57,7 @@ StringBuffer sb = new StringBuffer(); sb.append("1234;abcd;1234\n"); sb.append("abcd;1234;abcd"); - ByteArrayInputStream in = new ByteArrayInputStream(sb.toString().getBytes()); + ByteArrayInputStream in = new ByteArrayInputStream(sb.toString().getBytes("UTF-8")); CSVConfigGuesser guesser = new CSVConfigGuesser(in); CSVConfig guessed = guesser.guess(); assertEquals(expected.isFixedWidth(), guessed.isFixedWidth()); @@ -70,7 +70,7 @@ * 1,2,3,4 * */ - public void testConfigGuess2() { + public void testConfigGuess2() throws Exception { CSVConfig expected = new CSVConfig(); expected.setDelimiter(';'); expected.setValueDelimiter(' '); @@ -80,7 +80,7 @@ StringBuffer sb = new StringBuffer(); sb.append("1,2,3,4\n"); sb.append("abcd,1234,abcd,1234"); - ByteArrayInputStream in = new ByteArrayInputStream(sb.toString().getBytes()); + ByteArrayInputStream in = new ByteArrayInputStream(sb.toString().getBytes("UTF-8")); CSVConfigGuesser guesser = new CSVConfigGuesser(in); CSVConfig guessed = guesser.guess(); assertEquals(expected.isFixedWidth(), guessed.isFixedWidth()); Index: solr/core/src/test/org/apache/solr/request/TestBinaryResponseWriter.java =================================================================== --- solr/core/src/test/org/apache/solr/request/TestBinaryResponseWriter.java (revision 1359190) +++ solr/core/src/test/org/apache/solr/request/TestBinaryResponseWriter.java (working copy) @@ -55,7 +55,7 @@ * Tests known types implementation by asserting correct encoding/decoding of UUIDField */ public void testUUID() throws Exception { - String s = UUID.randomUUID().toString().toLowerCase(Locale.ENGLISH); + String s = UUID.randomUUID().toString().toLowerCase(Locale.ROOT); assertU(adoc("id", "101", "uuid", s)); assertU(commit()); LocalSolrQueryRequest req = lrf.makeRequest("q", "*:*"); Index: solr/core/src/test/org/apache/solr/request/TestFaceting.java =================================================================== --- solr/core/src/test/org/apache/solr/request/TestFaceting.java (revision 1359190) +++ solr/core/src/test/org/apache/solr/request/TestFaceting.java (working copy) @@ -52,7 +52,7 @@ } String t(int tnum) { - return String.format(Locale.US, "%08d", tnum); + return String.format(Locale.ROOT, "%08d", tnum); } void createIndex(int nTerms) { Index: solr/core/src/test/org/apache/solr/request/TestRemoteStreaming.java =================================================================== --- solr/core/src/test/org/apache/solr/request/TestRemoteStreaming.java (revision 1359190) +++ solr/core/src/test/org/apache/solr/request/TestRemoteStreaming.java (working copy) @@ -33,6 +33,7 @@ import java.io.IOException; import java.io.InputStream; +import java.io.InputStreamReader; import java.io.StringWriter; import java.io.UnsupportedEncodingException; import java.net.URL; @@ -83,7 +84,7 @@ InputStream inputStream = (InputStream) obj; try { StringWriter strWriter = new StringWriter(); - IOUtils.copy(inputStream,strWriter); + IOUtils.copy(new InputStreamReader(inputStream, "UTF-8"),strWriter); return strWriter.toString(); } finally { IOUtils.closeQuietly(inputStream); Index: solr/core/src/test/org/apache/solr/schema/DateFieldTest.java =================================================================== --- solr/core/src/test/org/apache/solr/schema/DateFieldTest.java (revision 1359190) +++ solr/core/src/test/org/apache/solr/schema/DateFieldTest.java (working copy) @@ -31,7 +31,7 @@ public class DateFieldTest extends LuceneTestCase { public static TimeZone UTC = TimeZone.getTimeZone("UTC"); protected DateField f = null; - protected DateMathParser p = new DateMathParser(UTC, Locale.US); + protected DateMathParser p = new DateMathParser(UTC, Locale.ROOT); @Override public void setUp() throws Exception { Index: solr/core/src/test/org/apache/solr/search/TestLFUCache.java =================================================================== --- solr/core/src/test/org/apache/solr/search/TestLFUCache.java (revision 1359190) +++ solr/core/src/test/org/apache/solr/search/TestLFUCache.java (working copy) @@ -26,6 +26,7 @@ import java.io.IOException; import java.util.HashMap; +import java.util.Locale; import java.util.Map; @@ -112,7 +113,7 @@ private void assertCache(LFUCache cache, int... gets) { for (int idx : gets) { if (cache.get(idx) == null) { - log.error(String.format("Expected entry %d not in cache", idx)); + log.error(String.format(Locale.ROOT, "Expected entry %d not in cache", idx)); assertTrue(false); } } @@ -120,7 +121,7 @@ private void assertNotCache(LFUCache cache, int... gets) { for (int idx : gets) { if (cache.get(idx) != null) { - log.error(String.format("Unexpected entry %d in cache", idx)); + log.error(String.format(Locale.ROOT, "Unexpected entry %d in cache", idx)); assertTrue(false); } } Index: solr/core/src/test/org/apache/solr/search/TestRecovery.java =================================================================== --- solr/core/src/test/org/apache/solr/search/TestRecovery.java (revision 1359190) +++ solr/core/src/test/org/apache/solr/search/TestRecovery.java (working copy) @@ -906,7 +906,7 @@ // WARNING... assumes format of .00000n where n is less than 9 long logNumber = Long.parseLong(fname.substring(fname.lastIndexOf(".") + 1)); - String fname2 = String.format(Locale.ENGLISH, + String fname2 = String.format(Locale.ROOT, UpdateLog.LOG_FILENAME_PATTERN, UpdateLog.TLOG_NAME, logNumber + 1); Index: solr/core/src/test/org/apache/solr/search/TestSearchPerf.java =================================================================== --- solr/core/src/test/org/apache/solr/search/TestSearchPerf.java (revision 1359190) +++ solr/core/src/test/org/apache/solr/search/TestSearchPerf.java (working copy) @@ -50,7 +50,7 @@ } String t(int tnum) { - return String.format("%08d", tnum); + return String.format(Locale.ROOT, "%08d", tnum); } Random r = new Random(0); // specific seed for reproducible perf testing Index: solr/core/src/test/org/apache/solr/TestTrie.java =================================================================== --- solr/core/src/test/org/apache/solr/TestTrie.java (revision 1359190) +++ solr/core/src/test/org/apache/solr/TestTrie.java (working copy) @@ -169,11 +169,11 @@ assertQ("Range filter tint:[* to *] must match 10 documents", req("q", "*:*", "fq", "tdate:[* TO *]"), "//*[@numFound='10']"); // Test date math syntax - SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.US); + SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ROOT); format.setTimeZone(TimeZone.getTimeZone("UTC")); assertU(delQ("*:*")); - DateMathParser dmp = new DateMathParser(DateField.UTC, Locale.US); + DateMathParser dmp = new DateMathParser(DateField.UTC, Locale.ROOT); String largestDate = ""; for (int i = 0; i < 10; i++) { // index 10 days starting with today @@ -220,9 +220,9 @@ checkPrecisionSteps("tdate"); // For tdate tests - SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.US); + SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ROOT); format.setTimeZone(TimeZone.getTimeZone("UTC")); - DateMathParser dmp = new DateMathParser(DateField.UTC, Locale.US); + DateMathParser dmp = new DateMathParser(DateField.UTC, Locale.ROOT); for (int i = 0; i < 10; i++) { long l = Integer.MAX_VALUE + i*1L; Index: solr/core/src/test/org/apache/solr/util/DateMathParserTest.java =================================================================== --- solr/core/src/test/org/apache/solr/util/DateMathParserTest.java (revision 1359190) +++ solr/core/src/test/org/apache/solr/util/DateMathParserTest.java (working copy) @@ -55,17 +55,17 @@ public DateMathParserTest() { super(); fmt = new SimpleDateFormat - ("G yyyyy MM ww WW DD dd F E aa HH hh mm ss SSS z Z",Locale.US); + ("G yyyyy MM ww WW DD dd F E aa HH hh mm ss SSS z Z",Locale.ROOT); fmt.setTimeZone(UTC); - parser = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS",Locale.US); + parser = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS",Locale.ROOT); parser.setTimeZone(UTC); } /** MACRO: Round: parses s, rounds with u, fmts */ protected String r(String s, String u) throws Exception { Date d = parser.parse(s); - Calendar c = Calendar.getInstance(UTC, Locale.US); + Calendar c = Calendar.getInstance(UTC, Locale.ROOT); c.setTime(d); DateMathParser.round(c, u); return fmt.format(c.getTime()); @@ -74,7 +74,7 @@ /** MACRO: Add: parses s, adds v u, fmts */ protected String a(String s, int v, String u) throws Exception { Date d = parser.parse(s); - Calendar c = Calendar.getInstance(UTC, Locale.US); + Calendar c = Calendar.getInstance(UTC, Locale.ROOT); c.setTime(d); DateMathParser.add(c, v, u); return fmt.format(c.getTime()); @@ -162,7 +162,7 @@ public void testParseStatelessness() throws Exception { - DateMathParser p = new DateMathParser(UTC, Locale.US); + DateMathParser p = new DateMathParser(UTC, Locale.ROOT); p.setNow(parser.parse("2001-07-04T12:08:56.235")); String e = fmt.format(p.parseMath("")); @@ -179,7 +179,7 @@ public void testParseMath() throws Exception { - DateMathParser p = new DateMathParser(UTC, Locale.US); + DateMathParser p = new DateMathParser(UTC, Locale.ROOT); p.setNow(parser.parse("2001-07-04T12:08:56.235")); // No-Op @@ -272,7 +272,7 @@ // US, Positive Offset with DST TimeZone tz = TimeZone.getTimeZone(PLUS_TZS); - DateMathParser p = new DateMathParser(tz, Locale.US); + DateMathParser p = new DateMathParser(tz, Locale.ROOT); p.setNow(parser.parse("2001-07-04T12:08:56.235")); @@ -294,7 +294,7 @@ // France, Negative Offset with DST tz = TimeZone.getTimeZone(NEG_TZS); - p = new DateMathParser(tz, Locale.US); + p = new DateMathParser(tz, Locale.ROOT); p.setNow(parser.parse("2001-07-04T12:08:56.235")); assertMath("2001-07-04T12:08:56.000", p, "/SECOND"); @@ -311,7 +311,7 @@ public void testParseMathExceptions() throws Exception { - DateMathParser p = new DateMathParser(UTC, Locale.US); + DateMathParser p = new DateMathParser(UTC, Locale.ROOT); p.setNow(parser.parse("2001-07-04T12:08:56.235")); Map badCommands = new HashMap(); Index: solr/core/src/test/org/apache/solr/util/TimeZoneUtilsTest.java =================================================================== --- solr/core/src/test/org/apache/solr/util/TimeZoneUtilsTest.java (revision 1359190) +++ solr/core/src/test/org/apache/solr/util/TimeZoneUtilsTest.java (working copy) @@ -120,10 +120,10 @@ int hour = _TestUtil.nextInt(r, 0, 23); int min = _TestUtil.nextInt(r, 0, 59); - String hours = String.format(Locale.US, + String hours = String.format(Locale.ROOT, (r.nextBoolean() ? ONE_DIGIT : TWO_DIGIT), hour); - String mins = String.format(Locale.US, TWO_DIGIT, min); + String mins = String.format(Locale.ROOT, TWO_DIGIT, min); String input = "GMT" + (r.nextBoolean()?"+":"-") + hours + (r.nextBoolean() ? "" : ((r.nextBoolean()?":":"") + mins)); assertSameRules(input, Index: solr/example =================================================================== --- solr/example (revision 1359190) +++ solr/example (working copy) Property changes on: solr/example ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/branches/lucene4199/solr/example:r1358548-1359191 Index: solr/solrj =================================================================== --- solr/solrj (revision 1359190) +++ solr/solrj (working copy) Property changes on: solr/solrj ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/branches/lucene4199/solr/solrj:r1358548-1359191 Index: solr/solrj/src/java/org/apache/solr/client/solrj/beans/DocumentObjectBinder.java =================================================================== --- solr/solrj/src/java/org/apache/solr/client/solrj/beans/DocumentObjectBinder.java (revision 1359190) +++ solr/solrj/src/java/org/apache/solr/client/solrj/beans/DocumentObjectBinder.java (working copy) @@ -178,7 +178,7 @@ } else { String setterName = setter.getName(); if (setterName.startsWith("set") && setterName.length() > 3) { - name = setterName.substring(3, 4).toLowerCase(Locale.ENGLISH) + setterName.substring(4); + name = setterName.substring(3, 4).toLowerCase(Locale.ROOT) + setterName.substring(4); } else { name = setter.getName(); } Index: solr/solrj/src/java/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrServer.java =================================================================== --- solr/solrj/src/java/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrServer.java (revision 1359190) +++ solr/solrj/src/java/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrServer.java (working copy) @@ -20,6 +20,7 @@ import java.io.IOException; import java.io.OutputStream; import java.util.LinkedList; +import java.util.Locale; import java.util.Queue; import java.util.concurrent.BlockingQueue; import java.util.concurrent.CountDownLatch; @@ -158,7 +159,7 @@ fmt = ""; } if (fmt != null) { - byte[] content = String.format( + byte[] content = String.format(Locale.ROOT, fmt, params.getBool(UpdateParams.WAIT_SEARCHER, false) + "").getBytes("UTF-8"); Index: solr/solrj/src/java/org/apache/solr/client/solrj/impl/XMLResponseParser.java =================================================================== --- solr/solrj/src/java/org/apache/solr/client/solrj/impl/XMLResponseParser.java (revision 1359190) +++ solr/solrj/src/java/org/apache/solr/client/solrj/impl/XMLResponseParser.java (working copy) @@ -190,7 +190,7 @@ { if( v != null ) { try { - return KnownType.valueOf( v.toUpperCase(Locale.ENGLISH) ); + return KnownType.valueOf( v.toUpperCase(Locale.ROOT) ); } catch( Exception ex ) {} } @@ -270,7 +270,7 @@ if( XMLStreamConstants.START_ELEMENT != parser.getEventType() ) { throw new RuntimeException( "must be start element, not: "+parser.getEventType() ); } - if( !"arr".equals( parser.getLocalName().toLowerCase(Locale.ENGLISH) ) ) { + if( !"arr".equals( parser.getLocalName().toLowerCase(Locale.ROOT) ) ) { throw new RuntimeException( "must be 'arr', not: "+parser.getLocalName() ); } @@ -373,7 +373,7 @@ if( XMLStreamConstants.START_ELEMENT != parser.getEventType() ) { throw new RuntimeException( "must be start element, not: "+parser.getEventType() ); } - if( !"doc".equals( parser.getLocalName().toLowerCase(Locale.ENGLISH) ) ) { + if( !"doc".equals( parser.getLocalName().toLowerCase(Locale.ROOT) ) ) { throw new RuntimeException( "must be 'lst', not: "+parser.getLocalName() ); } Index: solr/solrj/src/java/org/apache/solr/client/solrj/SolrQuery.java =================================================================== --- solr/solrj/src/java/org/apache/solr/client/solrj/SolrQuery.java (revision 1359190) +++ solr/solrj/src/java/org/apache/solr/client/solrj/SolrQuery.java (working copy) @@ -26,6 +26,7 @@ import org.apache.solr.common.util.DateUtil; import java.util.Date; +import java.util.Locale; import java.util.regex.Pattern; @@ -240,9 +241,9 @@ */ public SolrQuery addNumericRangeFacet(String field, Number start, Number end, Number gap) { add(FacetParams.FACET_RANGE, field); - add(String.format("f.%s.%s", field, FacetParams.FACET_RANGE_START), start.toString()); - add(String.format("f.%s.%s", field, FacetParams.FACET_RANGE_END), end.toString()); - add(String.format("f.%s.%s", field, FacetParams.FACET_RANGE_GAP), gap.toString()); + add(String.format(Locale.ROOT, "f.%s.%s", field, FacetParams.FACET_RANGE_START), start.toString()); + add(String.format(Locale.ROOT, "f.%s.%s", field, FacetParams.FACET_RANGE_END), end.toString()); + add(String.format(Locale.ROOT, "f.%s.%s", field, FacetParams.FACET_RANGE_GAP), gap.toString()); this.set(FacetParams.FACET, true); return this; } @@ -258,9 +259,9 @@ */ public SolrQuery addDateRangeFacet(String field, Date start, Date end, String gap) { add(FacetParams.FACET_RANGE, field); - add(String.format("f.%s.%s", field, FacetParams.FACET_RANGE_START), DateUtil.getThreadLocalDateFormat().format(start)); - add(String.format("f.%s.%s", field, FacetParams.FACET_RANGE_END), DateUtil.getThreadLocalDateFormat().format(end)); - add(String.format("f.%s.%s", field, FacetParams.FACET_RANGE_GAP), gap); + add(String.format(Locale.ROOT, "f.%s.%s", field, FacetParams.FACET_RANGE_START), DateUtil.getThreadLocalDateFormat().format(start)); + add(String.format(Locale.ROOT, "f.%s.%s", field, FacetParams.FACET_RANGE_END), DateUtil.getThreadLocalDateFormat().format(end)); + add(String.format(Locale.ROOT, "f.%s.%s", field, FacetParams.FACET_RANGE_GAP), gap); this.set(FacetParams.FACET, true); return this; } Index: solr/solrj/src/java/org/apache/solr/common/params/CollectionParams.java =================================================================== --- solr/solrj/src/java/org/apache/solr/common/params/CollectionParams.java (revision 1359190) +++ solr/solrj/src/java/org/apache/solr/common/params/CollectionParams.java (working copy) @@ -34,7 +34,7 @@ { if( p != null ) { try { - return CollectionAction.valueOf( p.toUpperCase(Locale.ENGLISH) ); + return CollectionAction.valueOf( p.toUpperCase(Locale.ROOT) ); } catch( Exception ex ) {} } Index: solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java =================================================================== --- solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java (revision 1359190) +++ solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java (working copy) @@ -154,7 +154,7 @@ public static EchoParamStyle get( String v ) { if( v != null ) { - v = v.toUpperCase(Locale.ENGLISH); + v = v.toUpperCase(Locale.ROOT); if( v.equals( "EXPLICIT" ) ) { return EXPLICIT; } Index: solr/solrj/src/java/org/apache/solr/common/params/CoreAdminParams.java =================================================================== --- solr/solrj/src/java/org/apache/solr/common/params/CoreAdminParams.java (revision 1359190) +++ solr/solrj/src/java/org/apache/solr/common/params/CoreAdminParams.java (working copy) @@ -99,7 +99,7 @@ { if( p != null ) { try { - return CoreAdminAction.valueOf( p.toUpperCase(Locale.ENGLISH) ); + return CoreAdminAction.valueOf( p.toUpperCase(Locale.ROOT) ); } catch( Exception ex ) {} } Index: solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java =================================================================== --- solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java (revision 1359190) +++ solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java (working copy) @@ -264,10 +264,10 @@ public enum FacetRangeOther { BEFORE, AFTER, BETWEEN, ALL, NONE; @Override - public String toString() { return super.toString().toLowerCase(); } + public String toString() { return super.toString().toLowerCase(Locale.ROOT); } public static FacetRangeOther get(String label) { try { - return valueOf(label.toUpperCase()); + return valueOf(label.toUpperCase(Locale.ROOT)); } catch (IllegalArgumentException e) { throw new SolrException (SolrException.ErrorCode.BAD_REQUEST, @@ -283,10 +283,10 @@ public enum FacetDateOther { BEFORE, AFTER, BETWEEN, ALL, NONE; @Override - public String toString() { return super.toString().toLowerCase(); } + public String toString() { return super.toString().toLowerCase(Locale.ROOT); } public static FacetDateOther get(String label) { try { - return valueOf(label.toUpperCase()); + return valueOf(label.toUpperCase(Locale.ROOT)); } catch (IllegalArgumentException e) { throw new SolrException (SolrException.ErrorCode.BAD_REQUEST, @@ -317,10 +317,10 @@ public enum FacetRangeInclude { ALL, LOWER, UPPER, EDGE, OUTER; @Override - public String toString() { return super.toString().toLowerCase(Locale.ENGLISH); } + public String toString() { return super.toString().toLowerCase(Locale.ROOT); } public static FacetRangeInclude get(String label) { try { - return valueOf(label.toUpperCase(Locale.ENGLISH)); + return valueOf(label.toUpperCase(Locale.ROOT)); } catch (IllegalArgumentException e) { throw new SolrException (SolrException.ErrorCode.BAD_REQUEST, Index: solr/solrj/src/java/org/apache/solr/common/params/MoreLikeThisParams.java =================================================================== --- solr/solrj/src/java/org/apache/solr/common/params/MoreLikeThisParams.java (revision 1359190) +++ solr/solrj/src/java/org/apache/solr/common/params/MoreLikeThisParams.java (working copy) @@ -59,7 +59,7 @@ public static TermStyle get( String p ) { if( p != null ) { - p = p.toUpperCase(Locale.ENGLISH); + p = p.toUpperCase(Locale.ROOT); if( p.equals( "DETAILS" ) ) { return DETAILS; } Index: solr/solrj/src/java/org/apache/solr/common/util/ContentStreamBase.java =================================================================== --- solr/solrj/src/java/org/apache/solr/common/util/ContentStreamBase.java (revision 1359190) +++ solr/solrj/src/java/org/apache/solr/common/util/ContentStreamBase.java (working copy) @@ -20,7 +20,6 @@ import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; -import java.io.FileReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; @@ -30,7 +29,9 @@ import java.net.URLConnection; import java.util.Locale; +import org.apache.lucene.util.IOUtils; + /** * Three concrete implementations for ContentStream - one for File/URL/String * @@ -52,7 +53,7 @@ public static String getCharsetFromContentType( String contentType ) { if( contentType != null ) { - int idx = contentType.toLowerCase(Locale.ENGLISH).indexOf( "charset=" ); + int idx = contentType.toLowerCase(Locale.ROOT).indexOf( "charset=" ); if( idx > 0 ) { return contentType.substring( idx + "charset=".length() ).trim(); } @@ -126,13 +127,13 @@ /** * If an charset is defined (by the contentType) use that, otherwise - * use a file reader + * use a UTF-8 reader */ @Override public Reader getReader() throws IOException { String charset = getCharsetFromContentType( contentType ); return charset == null - ? new FileReader( file ) + ? new InputStreamReader(getStream(), IOUtils.CHARSET_UTF_8) : new InputStreamReader( getStream(), charset ); } } Index: solr/solrj/src/java/org/apache/solr/common/util/DateUtil.java =================================================================== --- solr/solrj/src/java/org/apache/solr/common/util/DateUtil.java (revision 1359190) +++ solr/solrj/src/java/org/apache/solr/common/util/DateUtil.java (working copy) @@ -57,7 +57,7 @@ private static final Date DEFAULT_TWO_DIGIT_YEAR_START; static { - Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("GMT"), Locale.US); + Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("GMT"), Locale.ROOT); calendar.set(2000, Calendar.JANUARY, 1, 0, 0); DEFAULT_TWO_DIGIT_YEAR_START = calendar.getTime(); } @@ -147,7 +147,7 @@ while (formatIter.hasNext()) { String format = (String) formatIter.next(); if (dateParser == null) { - dateParser = new SimpleDateFormat(format, Locale.US); + dateParser = new SimpleDateFormat(format, Locale.ROOT); dateParser.setTimeZone(GMT); dateParser.set2DigitYearStart(startDate); } else { @@ -184,7 +184,7 @@ public ThreadLocalDateFormat() { super(); //2007-04-26T08:05:04Z - SimpleDateFormat tmp = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", Locale.US); + SimpleDateFormat tmp = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", Locale.ROOT); tmp.setTimeZone(UTC); proto = tmp; } @@ -202,7 +202,7 @@ // builder's buffer. StringBuilder sb = out instanceof StringBuilder ? (StringBuilder)out : new StringBuilder(); - if (cal==null) cal = Calendar.getInstance(TimeZone.getTimeZone("GMT"), Locale.US); + if (cal==null) cal = Calendar.getInstance(TimeZone.getTimeZone("GMT"), Locale.ROOT); cal.setTime(date); int i = cal.get(Calendar.YEAR); Index: solr/solrj/src/java/org/apache/solr/common/util/StrUtils.java =================================================================== --- solr/solrj/src/java/org/apache/solr/common/util/StrUtils.java (revision 1359190) +++ solr/solrj/src/java/org/apache/solr/common/util/StrUtils.java (working copy) @@ -209,7 +209,7 @@ public static List toLower(List strings) { ArrayList ret = new ArrayList(strings.size()); for (String str : strings) { - ret.add(str.toLowerCase(Locale.ENGLISH)); + ret.add(str.toLowerCase(Locale.ROOT)); } return ret; } Index: solr/solrj/src/test/org/apache/solr/client/solrj/embedded/TestSolrProperties.java =================================================================== --- solr/solrj/src/test/org/apache/solr/client/solrj/embedded/TestSolrProperties.java (revision 1359190) +++ solr/solrj/src/test/org/apache/solr/client/solrj/embedded/TestSolrProperties.java (working copy) @@ -19,6 +19,7 @@ import java.io.File; import java.io.FileInputStream; +import java.io.InputStreamReader; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; @@ -169,7 +170,7 @@ Document document = builder.parse(fis); fis.close(); fis = new FileInputStream(new File(tempDir, SOLR_PERSIST_XML)); - String solrPersistXml = IOUtils.toString(fis); + String solrPersistXml = IOUtils.toString(new InputStreamReader(fis, "UTF-8")); //System.out.println("xml:" + solrPersistXml); assertTrue("\"/solr/cores[@defaultCoreName='core0']\" doesn't match in:\n" + solrPersistXml, exists("/solr/cores[@defaultCoreName='core0']", document)); Index: solr/solrj/src/test/org/apache/solr/client/solrj/SolrQueryTest.java =================================================================== --- solr/solrj/src/test/org/apache/solr/client/solrj/SolrQueryTest.java (revision 1359190) +++ solr/solrj/src/test/org/apache/solr/client/solrj/SolrQueryTest.java (working copy) @@ -26,6 +26,7 @@ import java.util.Calendar; import java.util.Date; import java.util.Locale; +import java.util.TimeZone; /** * @@ -139,7 +140,7 @@ public void testFacetDateRange() { SolrQuery q = new SolrQuery("dog"); - Calendar calendar = Calendar.getInstance(Locale.UK); + Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.UK); calendar.set(2010, 1, 1); Date start = calendar.getTime(); calendar.set(2011, 1, 1); Index: solr/solrj/src/test/org/apache/solr/common/util/ContentStreamTest.java =================================================================== --- solr/solrj/src/test/org/apache/solr/common/util/ContentStreamTest.java (revision 1359190) +++ solr/solrj/src/test/org/apache/solr/common/util/ContentStreamTest.java (working copy) @@ -21,9 +21,9 @@ import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; -import java.io.FileReader; import java.io.IOException; import java.io.InputStream; +import java.io.InputStreamReader; import java.io.StringReader; import java.net.ConnectException; import java.net.HttpURLConnection; @@ -60,7 +60,7 @@ ContentStreamBase stream = new ContentStreamBase.FileStream( file ); assertEquals( file.length(), stream.getSize().intValue() ); assertTrue( IOUtils.contentEquals( new FileInputStream( file ), stream.getStream() ) ); - assertTrue( IOUtils.contentEquals( new FileReader( file ), stream.getReader() ) ); + assertTrue( IOUtils.contentEquals( new InputStreamReader(new FileInputStream(file), "UTF-8"), stream.getReader() ) ); } Index: solr/test-framework =================================================================== --- solr/test-framework (revision 1359190) +++ solr/test-framework (working copy) Property changes on: solr/test-framework ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/branches/lucene4199/solr/test-framework:r1358548-1359191