Index: ivy/libraries.properties =================================================================== --- ivy/libraries.properties (revision 898020) +++ ivy/libraries.properties (working copy) @@ -26,7 +26,7 @@ zookeeper.version=3.2.2 thrift.version=r771587 -lucene.version=2.2.0 +lucene.version=3.0.0 jsr311.version=1.1.1 Index: src/java/org/apache/hadoop/hbase/mapreduce/IndexConfiguration.java =================================================================== --- src/java/org/apache/hadoop/hbase/mapreduce/IndexConfiguration.java (revision 898020) +++ src/java/org/apache/hadoop/hbase/mapreduce/IndexConfiguration.java (working copy) @@ -37,6 +37,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; @@ -53,7 +54,15 @@ static final String HBASE_COLUMN_NAME = "hbase.column.name"; static final String HBASE_COLUMN_STORE = "hbase.column.store"; static final String HBASE_COLUMN_INDEX = "hbase.column.index"; + + /** + * Tokenize property terminology is deprecated in lucene / replaced by analyze. + * @see #HBASE_COLUMN_ANALYZE + * @deprecated + */ static final String HBASE_COLUMN_TOKENIZE = "hbase.column.tokenize"; + static final String HBASE_COLUMN_ANALYZE = "hbase.column.analyze"; + static final String HBASE_COLUMN_BOOST = "hbase.column.boost"; static final String HBASE_COLUMN_OMIT_NORMS = "hbase.column.omit.norms"; static final String HBASE_INDEX_ROWKEY_NAME = "hbase.index.rowkey.name"; @@ -131,14 +140,34 @@ getColumn(columnName).setBoolean(HBASE_COLUMN_STORE, store); } + /** + * @deprecated + * @see Use #isAnalyze(String) for replacement. + * @param columnName + * @return + */ public boolean isTokenize(String columnName) { return getColumn(columnName).getBoolean(HBASE_COLUMN_TOKENIZE, true); } + /** + * @deprecated + * @see Use #setAnalyze(String, boolean) for replacement. + * @param columnName + * @param tokenize + */ public void setTokenize(String columnName, boolean tokenize) { getColumn(columnName).setBoolean(HBASE_COLUMN_TOKENIZE, tokenize); } + public boolean isAnalyze(String columnName) { + return getColumn(columnName).getBoolean(HBASE_COLUMN_ANALYZE, true); + } + + public void setAnalyze(String columnName, boolean analyze) { + getColumn(columnName).setBoolean(HBASE_COLUMN_ANALYZE, analyze); + } + public float getBoost(String columnName) { return getColumn(columnName).getFloat(HBASE_COLUMN_BOOST, 1.0f); } @@ -166,7 +195,7 @@ public String getAnalyzerName() { return get(HBASE_INDEX_ANALYZER_NAME, - "org.apache.lucene.analysis.standard.StandardAnalyzer"); + StandardAnalyzer.class.getName()); } public void setAnalyzerName(String analyzerName) { Index: src/java/org/apache/hadoop/hbase/mapreduce/IndexOutputFormat.java =================================================================== --- src/java/org/apache/hadoop/hbase/mapreduce/IndexOutputFormat.java (revision 898020) +++ src/java/org/apache/hadoop/hbase/mapreduce/IndexOutputFormat.java (working copy) @@ -19,6 +19,7 @@ */ package org.apache.hadoop.hbase.mapreduce; +import java.io.File; import java.io.IOException; import java.util.Random; @@ -32,7 +33,9 @@ import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriter.MaxFieldLength; import org.apache.lucene.search.Similarity; +import org.apache.lucene.store.FSDirectory; /** * Create a local index, unwrap Lucene documents created by reduce, add them to @@ -87,8 +90,8 @@ } // build locally first - final IndexWriter writer = new IndexWriter(fs.startLocalOutput(perm, temp) - .toString(), analyzer, true); + final IndexWriter writer = new IndexWriter(FSDirectory.open(new File(fs.startLocalOutput(perm, temp) + .toString())), analyzer, true, MaxFieldLength.LIMITED); // no delete, so no need for maxBufferedDeleteTerms writer.setMaxBufferedDocs(indexConf.getMaxBufferedDocs()); @@ -98,11 +101,10 @@ String similarityName = indexConf.getSimilarityName(); if (similarityName != null) { try { - Class similarityClass = Class.forName(similarityName); - Similarity similarity = (Similarity) similarityClass.newInstance(); + Similarity similarity = Class.forName(similarityName).asSubclass(Similarity.class).newInstance(); writer.setSimilarity(similarity); } catch (Exception e) { - throw new IOException("Error in creating a similarty object " + throw new IOException("Error in creating a similarity object " + similarityName); } } Index: src/java/org/apache/hadoop/hbase/mapreduce/IndexTableReducer.java =================================================================== --- src/java/org/apache/hadoop/hbase/mapreduce/IndexTableReducer.java (revision 898020) +++ src/java/org/apache/hadoop/hbase/mapreduce/IndexTableReducer.java (working copy) @@ -68,7 +68,7 @@ // index and store row key, row key already UTF-8 encoded Field keyField = new Field(indexConf.getRowkeyName(), Bytes.toString(key.get(), key.getOffset(), key.getLength()), - Field.Store.YES, Field.Index.UN_TOKENIZED); + Field.Store.YES, Field.Index.NOT_ANALYZED); keyField.setOmitNorms(true); doc.add(keyField); } @@ -82,7 +82,7 @@ Field.Store.YES: Field.Store.NO; Field.Index index = indexConf.isIndex(column)? (indexConf.isTokenize(column)? - Field.Index.TOKENIZED: Field.Index.UN_TOKENIZED): + Field.Index.ANALYZED: Field.Index.NOT_ANALYZED): Field.Index.NO; // UTF-8 encode value