Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1100748) +++ lucene/CHANGES.txt (working copy) @@ -484,6 +484,13 @@ returns NumericField instances. (Uwe Schindler, Ryan McKinley, Mike McCandless) +New features + +* LUCENE-3082: Added index upgrade tool + org.apache.lucene.index.IndexFormatUpgrader that allows to upgrade + all segments to last recent supported index format without fully + optimizing. (Uwe Schindler, Mike McCandless) + Optimizations * LUCENE-2990: ArrayUtil/CollectionUtil.*Sort() methods now exit early Index: lucene/src/java/org/apache/lucene/index/IndexFormatUpgrader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/IndexFormatUpgrader.java (revision 0) +++ lucene/src/java/org/apache/lucene/index/IndexFormatUpgrader.java (revision 0) @@ -0,0 +1,119 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.util.Constants; +import org.apache.lucene.util.Version; + +import java.io.File; +import java.io.IOException; +import java.io.PrintStream; +import java.util.Collection; + +/** + * This is an easy-to-use tool that upgrades all segments of an index from previous Lucene versions + * to the current segment file format. It can be used from command line: + *
+  *  java -cp lucene-core.jar org.apache.lucene.index.IndexFormatUpgrader [-delete-prior-commits] [-verbose] indexDir
+  * 
+ * Alternatively this class can be instantiated and {@link #upgrade} invoked. It uses {@link UpgradeIndexMergePolicy} + * and triggers the upgrade via an optimize request to {@link IndexWriter}. + * For safety reasons, the tool refuses by default to upgrade indexes with prior commit points ({@link IndexCommit}), + * unless explicitely instructed to only keep the last commit. + */ +public final class IndexFormatUpgrader { + + private static void printUsage() { + System.err.println("Upgrades an index so all segments created with a previous Lucene version are rewritten."); + System.err.println("Usage: java " + IndexFormatUpgrader.class.getName() + " [-delete-prior-commits] [-verbose] indexDir"); + System.exit(1); + } + + public static void main(String[] args) throws IOException { + String dir = null; + boolean deletePriorCommits = false; + PrintStream out = null; + for (String arg : args) { + if ("-delete-prior-commits".equals(arg)) { + deletePriorCommits = true; + } else if ("-verbose".equals(arg)) { + out = System.out; + } else if (dir == null) { + dir = arg; + } else { + printUsage(); + } + } + if (dir == null) { + printUsage(); + } + + new IndexFormatUpgrader(FSDirectory.open(new File(args[0])), out, deletePriorCommits).upgrade(); + } + + private final Directory dir; + private final PrintStream infoStream; + private final IndexWriterConfig iwc; + private final boolean deletePriorCommits; + + @SuppressWarnings("deprecation") + public IndexFormatUpgrader(Directory dir) { + this(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, null), null, false); + } + + @SuppressWarnings("deprecation") + public IndexFormatUpgrader(Directory dir, PrintStream infoStream, boolean deletePriorCommits) { + this(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, null), infoStream, deletePriorCommits); + } + + public IndexFormatUpgrader(Directory dir, IndexWriterConfig iwc, PrintStream infoStream, boolean deletePriorCommits) { + this.dir = dir; + this.iwc = iwc; + this.infoStream = infoStream; + this.deletePriorCommits = deletePriorCommits; + } + + public void upgrade() throws IOException { + if (!IndexReader.indexExists(dir)) { + throw new IndexNotFoundException(dir.toString()); + } + + if (!deletePriorCommits) { + final Collection commits = IndexReader.listCommits(dir); + if (commits.size() > 1) + throw new IllegalArgumentException("This tool was invoked to not delete prior commit points, but the following commits were found: " + commits); + } + + final IndexWriterConfig c = (IndexWriterConfig) iwc.clone(); + c.setMergePolicy(new UpgradeIndexMergePolicy(c.getMergePolicy())); + c.setIndexDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); + + final IndexWriter w = new IndexWriter(dir, c); + try { + w.setInfoStream(infoStream); + w.message("Upgrading all pre-" + Constants.LUCENE_MAIN_VERSION + " segments of index directory '" + dir + "' to version " + Constants.LUCENE_MAIN_VERSION + "..."); + w.optimize(); + w.message("All segments upgraded to version " + Constants.LUCENE_MAIN_VERSION); + } finally { + w.close(); + } + } + +} Property changes on: lucene\src\java\org\apache\lucene\index\IndexFormatUpgrader.java ___________________________________________________________________ Added: svn:keywords + Date Author Id Revision HeadURL Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java =================================================================== --- lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java (revision 0) +++ lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java (revision 0) @@ -0,0 +1,149 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.Constants; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; + +/** This {@link MergePolicy} is used for upgrading all existing segments of + * an index when calling {@link IndexWriter#optimize()}. + * All other methods delegate to the base {@code MergePolicy} given to the constructor. + * This allows for an as-cheap-as possible upgrade of an older index by only upgrading segments that + * are created by previous Lucene versions. Optimize does no longer really optimize + * it is just used to "optimize" older segment versions away. + *

In general one would use {@link IndexFormatUpgrader}, but for a fully customizeable upgrade, + * you can use this like any other {@code MergePolicy} and call {@link IndexWriter#optimize()}: + *

+  *  IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_XX, new KeywordAnalyzer());
+  *  iwc.setMergePolicy(new UpgradeIndexMergePolicy(iwc.getMergePolicy()));
+  *  IndexWriter w = new IndexWriter(dir, iwc);
+  *  w.optimize();
+  *  w.close();
+  * 
+ * @see IndexFormatUpgrader + */ +public class UpgradeIndexMergePolicy extends MergePolicy { + + protected final MergePolicy base; + + /** Wrap the given {@link MergePolicy} and intercept optimize requests to + * only upgrade segments written with previous Lucene versions. */ + public UpgradeIndexMergePolicy(MergePolicy base) { + this.base = base; + } + + /** Returns if the given segment should be upgraded. The default implementation + * will return {@code !Constants.LUCENE_MAIN_VERSION.equals(si.getVersion())}, + * so all segments created with a different version number than this Lucene version will + * get upgraded. + */ + protected boolean shouldUpgradeSegment(SegmentInfo si) { + return !Constants.LUCENE_MAIN_VERSION.equals(si.getVersion()); + } + + @Override + public void setIndexWriter(IndexWriter writer) { + super.setIndexWriter(writer); + base.setIndexWriter(writer); + } + + @Override + public MergeSpecification findMerges(SegmentInfos segmentInfos) throws CorruptIndexException, IOException { + return base.findMerges(segmentInfos); + } + + @Override + public MergeSpecification findMergesForOptimize(SegmentInfos segmentInfos, int maxSegmentCount, Set segmentsToOptimize) throws CorruptIndexException, IOException { + // first find all old segments + final HashSet oldSegments = new HashSet(); + for (final SegmentInfo si : segmentInfos) { + if (segmentsToOptimize.contains(si) && shouldUpgradeSegment(si)) { + oldSegments.add(si); + } + } + + if (verbose()) message("findMergesForOptimize: segmentsToUpgrade=" + oldSegments); + + if (oldSegments.isEmpty()) + return null; + + MergeSpecification spec = base.findMergesForOptimize(segmentInfos, maxSegmentCount, oldSegments); + + if (spec != null) { + // remove all segments that are in merge specification from oldSegments, + // the resulting set contains all segments that are left over + // and will be merged to one additional segment: + for (final OneMerge om : spec.merges) { + oldSegments.removeAll(om.segments); + } + } + + if (!oldSegments.isEmpty()) { + if (verbose()) + message("findMergesForOptimize: " + base.getClass().getSimpleName() + + " does not want to merge all old segments, merge remaining ones into new segment: " + oldSegments); + final SegmentInfos newInfos = new SegmentInfos(); + for (final SegmentInfo si : segmentInfos) { + if (oldSegments.contains(si)) { + newInfos.add(si); + } + } + // add the final merge + if (spec == null) { + spec = new MergeSpecification(); + } + spec.add(new OneMerge(newInfos)); + } + + return spec; + } + + @Override + public MergeSpecification findMergesToExpungeDeletes(SegmentInfos segmentInfos) throws CorruptIndexException, IOException { + return base.findMergesToExpungeDeletes(segmentInfos); + } + + @Override + public boolean useCompoundFile(SegmentInfos segments, SegmentInfo newSegment) throws IOException { + return base.useCompoundFile(segments, newSegment); + } + + @Override + public void close() { + base.close(); + } + + @Override + public String toString() { + return "[" + getClass().getSimpleName() + "->" + base + "]"; + } + + private boolean verbose() { + IndexWriter w = writer.get(); + return w != null && w.verbose(); + } + + private void message(String message) { + if (verbose()) + writer.get().message("UPGMP: " + message); + } + +} Property changes on: lucene\src\java\org\apache\lucene\index\UpgradeIndexMergePolicy.java ___________________________________________________________________ Added: svn:keywords + Date Author Id Revision HeadURL Added: svn:eol-style + native Index: lucene/src/test/org/apache/lucene/index/index.31.optimized.cfs.zip =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Property changes on: lucene\src\test\org\apache\lucene\index\index.31.optimized.cfs.zip ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Index: lucene/src/test/org/apache/lucene/index/index.31.optimized.nocfs.zip =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Property changes on: lucene\src\test\org\apache\lucene\index\index.31.optimized.nocfs.zip ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Index: lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (revision 1100748) +++ lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (working copy) @@ -22,6 +22,7 @@ import java.io.IOException; import java.io.PrintStream; import java.util.Arrays; +import java.util.ArrayList; import java.util.List; import java.util.Random; @@ -41,10 +42,12 @@ import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; +import org.apache.lucene.util.Constants; /* Verify we can read the pre-4.0 file format, do searches @@ -110,6 +113,10 @@ "29.nocfs", }; + final String[] oldOptimizedNames = {"31.optimized.cfs", + "31.optimized.nocfs", + }; + /** This test checks that *only* IndexFormatTooOldExceptions are throws when you open and operate on too old indexes! */ public void testUnsupportedOldIndexes() throws Exception { for(int i=0;i names = new ArrayList(oldNames.length + oldOptimizedNames.length); + names.addAll(Arrays.asList(oldNames)); + names.addAll(Arrays.asList(oldOptimizedNames)); + for(String name : names) { + if (VERBOSE) { + System.out.println("testUpgradeOldIndex: index=" +name); + } + File oldIndxeDir = _TestUtil.getTempDir(name); + _TestUtil.unzip(getDataFile("index." + name + ".zip"), oldIndxeDir); + Directory dir = newFSDirectory(oldIndxeDir); + + // only use Log- or TieredMergePolicy, MockRandomMergePolicy does not behave correctly + // (it includes segments not in segmentsToMerge Set - maybe fix that?): + MergePolicy mp = random.nextBoolean() ? newLogMergePolicy() : newTieredMergePolicy(); + IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, null) + .setMergePolicy(mp); + new IndexFormatUpgrader(dir, iwc, VERBOSE ? System.out : null, false).upgrade(); + + checkAllSegmentsUpgraded(dir); + + _TestUtil.checkIndex(dir); + + dir.close(); + _TestUtil.rmDir(oldIndxeDir); + } + } + + public void testUpgradeOldOptimizedIndexWithAdditions() throws Exception { + for (String name : oldOptimizedNames) { + if (VERBOSE) { + System.out.println("testUpgradeOldOptimizedIndexWithAdditions: index=" +name); + } + File oldIndxeDir = _TestUtil.getTempDir(name); + _TestUtil.unzip(getDataFile("index." + name + ".zip"), oldIndxeDir); + Directory dir = newFSDirectory(oldIndxeDir); + + assertEquals("Original index must be optimized", 1, getNumberOfSegments(dir)); + + // create a bunch of dummy segments + int id = 40; + RAMDirectory ramDir = new RAMDirectory(); + for (int i = 0; i < 3; i++) { + // only use Log- or TieredMergePolicy, to make document addition predictable and not suddenly merge: + MergePolicy mp = random.nextBoolean() ? newLogMergePolicy() : newTieredMergePolicy(); + IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setMergePolicy(mp); + IndexWriter w = new IndexWriter(ramDir, iwc); + // add few more docs: + for(int j = 0; j < RANDOM_MULTIPLIER * random.nextInt(30); j++) { + addDoc(w, id++); + } + w.close(false); + } + + // add dummy segments (which are all in current version) to optimized index + MergePolicy mp = random.nextBoolean() ? newLogMergePolicy() : newTieredMergePolicy(); + IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, null) + .setMergePolicy(mp); + IndexWriter w = new IndexWriter(dir, iwc); + w.setInfoStream(VERBOSE ? System.out : null); + w.addIndexes(ramDir); + w.close(false); + + // determine count of segments in modified index + final int origSegCount = getNumberOfSegments(dir); + + // only use Log- or TieredMergePolicy, MockRandomMergePolicy does not behave correctly + // (it includes segments not in segmentsToMerge Set - maybe fix that?): + mp = random.nextBoolean() ? newLogMergePolicy() : newTieredMergePolicy(); + iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setMergePolicy(mp); + new IndexFormatUpgrader(dir, iwc, VERBOSE ? System.out : null, false).upgrade(); + + final int segCount = checkAllSegmentsUpgraded(dir); + assertEquals("Index must still contain the same number of segments, as only one segment was upgraded and nothing else merged", + origSegCount, segCount); + + dir.close(); + _TestUtil.rmDir(oldIndxeDir); + } + } + }