Index: src/java/org/apache/lucene/index/CheckIndex.java =================================================================== --- src/java/org/apache/lucene/index/CheckIndex.java (revision 698267) +++ src/java/org/apache/lucene/index/CheckIndex.java (working copy) @@ -31,22 +31,162 @@ import java.util.ArrayList; /** - * Basic tool to check the health of an index and write a - * new segments file that removes reference to problematic - * segments. There are many more checks that this tool - * could do but does not yet, eg: reconstructing a segments - * file by looking for all loadable segments (if no segments - * file is found), removing specifically specified segments, - * listing files that exist but are not referenced, etc. + * Basic tool and API to check the health of an index and + * write a new segments file that removes reference to + * problematic segments. + * + *

WARNING: this API is new and experimental and is + * subject to suddenly change in the next release. */ - public class CheckIndex { + /** + * Set this to see messages as the tool is checking + * @deprecated Please instantiate this class and set the output stream with {@link #setInfoStream} + */ public static PrintStream out = null; + /** Returns true if index is clean, else false. + * @deprecated Please use {@link #checkIndex( + * boolean)} from a class instance instead */ + public static boolean check(Directory dir, boolean doFix) throws IOException { + return check(dir, doFix, null); + } + + /** Returns true if index is clean, else false. + * @deprecated Please use {@link #checkIndex( + * boolean, List)} from a class instance instead */ + public static boolean check(Directory dir, boolean doFix, List onlySegments) throws IOException { + CheckIndex c=new CheckIndex(dir); + final boolean ok=c.checkIndex(onlySegments); + if (doFix && !ok) c.fix(); + return ok; + } + + /** Create an instance of CheckIndex, that checks index in given directory. **/ + public CheckIndex(final Directory dir) { + this.dir=dir; + } + + /** Sends output of this tool to the supplied PrintStream. **/ + public void setInfoStream(PrintStream stream) { + this.infoStream=stream; + } + + private void msg(String msg) { + if (infoStream != null) { + infoStream.println(msg); + } + } + + private PrintStream infoStream=null; + + /** Directory index is in. */ + public Directory dir; + + /** True if no problems were found with the index. */ + public boolean clean=false; + + /** True if we were unable to locate and load the segments_N file. */ + public boolean missingSegments=false; + + /** True if we were unable to open the segments_N file. */ + public boolean cantOpenSegments=false; + + /** True if we were unable to read the version number from segments_N file. */ + public boolean missingSegmentVersion=false; + + /** Name of latest segments_N file in the index. */ + public String segmentsFileName=null; + + /** Number of segments in the index. */ + public int numSegments=0; + + /** String description of the version of the index. */ + public String segmentFormat=null; + + /** Empty unless you passed specific segments list to check as optional 3rd argument. + * @see CheckIndex#checkIndex(Directory, boolean, List) */ + public List/**/ segmentsChecked = new ArrayList(); + + /** True if the index was created with a newer version of Lucene than the CheckIndex tool. */ + public boolean toolOutOfDate=false; + + /** List of {@link SegmentInfoStatus} instances, detailing status of each segment. */ + public List/*WARNING: this API is new and experimental and is + * subject to suddenly change in the next release. + */ + public static class SegmentInfoStatus { + /** Name of the segment. */ + public String name=null; + + /** Document count (does not take deletions into account). */ + public int docCount=0; + + /** True if segment is compound file format. */ + public boolean compound=false; + + /** Number of files referenced by this segment. */ + public int numFiles=0; + + /** Net size (MB) of the files referenced by this + * segment. */ + public double sizeMB=0.0; + + /** Doc store offset, if this segment shares the doc + * store files (stored fields and term vectors) with + * other segments. This is -1 if it does not share. */ + public int docStoreOffset = -1; + + /** String of the shared doc store segment, or null if + * this segment does not share the doc store files. */ + public String docStoreSegment=null; + + /** True if the shared doc store files are compound file + * format. */ + public boolean docStoreCompoundFile=false; + + /** True if this segment has pending deletions. */ + public boolean hasDeletions=false; + + /** Name of the current deletions file name. */ + public String deletionsFileName=null; + + /** Number of deleted documents. */ + public int numDeleted=0; + + /** True if we were able to open a SegmentReader on this + * segment. */ + public boolean openReaderPassed=false; + + /** Number of fields in this segment. */ + int numFields=0; + + /** True if at least one of the fields in this segment + * does not omitTf. + * @see Fieldable#setOmitTf */ + public boolean hasProx=false; + } + private static class MySegmentTermDocs extends SegmentTermDocs { - int delCount; + int delCount=0; MySegmentTermDocs(SegmentReader p) { super(p); @@ -62,24 +202,32 @@ } } - /** Returns true if index is clean, else false.*/ - public static CheckIndexStatus check(Directory dir, boolean doFix) throws IOException { - return check(dir, doFix, null); + /** Returns a boolean returning + * the state of the index. + * + *

WARNING: make sure + * you only call this when the index is not opened by any + * writer. */ + public boolean checkIndex() throws IOException { + return checkIndex(null); } - /** Returns true if index is clean, else false.*/ - public static CheckIndexStatus check(Directory dir, boolean doFix, List onlySegments) throws IOException { + /** Returns a boolean returning + * the state of the index. + * + *

WARNING: make sure + * you only call this when the index is not opened by any + * writer. */ + public boolean checkIndex(List onlySegments) throws IOException { NumberFormat nf = NumberFormat.getInstance(); SegmentInfos sis = new SegmentInfos(); - CheckIndexStatus result = new CheckIndexStatus(); - result.dir = dir; try { sis.read(dir); } catch (Throwable t) { msg("ERROR: could not read any segments file in directory"); - result.missingSegments = true; - t.printStackTrace(out); - return result; + missingSegments = true; + if (infoStream!=null) t.printStackTrace(infoStream); + return false; } final int numSegments = sis.size(); @@ -89,18 +237,18 @@ input = dir.openInput(segmentsFileName); } catch (Throwable t) { msg("ERROR: could not open segments file in directory"); - t.printStackTrace(out); - result.cantOpenSegments = true; - return result; + if (infoStream!=null) t.printStackTrace(infoStream); + cantOpenSegments = true; + return false; } int format = 0; try { format = input.readInt(); } catch (Throwable t) { msg("ERROR: could not read segment file version in directory"); - t.printStackTrace(out); - result.missingSegmentVersion = true; - return result; + if (infoStream!=null) t.printStackTrace(infoStream); + missingSegmentVersion = true; + return false; } finally { if (input != null) input.close(); @@ -133,36 +281,36 @@ } msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat); - result.segmentsFileName = segmentsFileName; - result.numSegments = numSegments; - result.segmentFormat = sFormat; + this.segmentsFileName = segmentsFileName; + this.numSegments = numSegments; + segmentFormat = sFormat; - if (onlySegments != null) { - out.print("\nChecking only these segments:"); + if (infoStream!=null && onlySegments != null) { + infoStream.print("\nChecking only these segments:"); Iterator it = onlySegments.iterator(); while (it.hasNext()) { - out.print(" " + it.next()); + infoStream.print(" " + it.next()); } - result.segmentsChecked.addAll(onlySegments); + segmentsChecked.addAll(onlySegments); msg(":"); } if (skip) { msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); - result.toolOutOfDate = true; - return result; + toolOutOfDate = true; + return false; } - result.newSegments = (SegmentInfos) sis.clone(); - result.newSegments.clear(); + newSegments = (SegmentInfos) sis.clone(); + newSegments.clear(); for(int i=0;iWARNING: this writes a new - * segments file into the index, effectively removing - * all documents in broken segments from the index. BE - * CAREFUL. */ - static public void fix(CheckIndexStatus result) throws IOException { - result.newSegments.commit(result.dir); + /** Repairs the index using previously returned result + * from {@link #checkIndex}. + * + *

WARNING: this writes a + * new segments file into the index, effectively removing + * all documents in broken segments from the index. + * BE CAREFUL. + * + *

WARNING: Make sure you only call this when the + * index is not opened by any writer. */ + public void fix() throws IOException { + if (newSegments==null) throw new IllegalStateException("You must check first."); + newSegments.commit(dir); } - static boolean assertsOn; - + static boolean assertsOn=false; private static boolean testAsserts() { - assertsOn = true; - return true; + assert assertsOn = true; + return assertsOn; } - private static void msg(String msg) { + private static void smsg(String msg) { if (out != null) { out.println(msg); } } - + public static void main(String[] args) throws Throwable { - + out=System.err; // set output stream for console to stderr boolean doFix = false; List onlySegments = new ArrayList(); String indexPath = null; @@ -380,14 +528,14 @@ i++; } else if (args[i].equals("-segment")) { if (i == args.length-1) { - msg("ERROR: missing name for -segment option"); + smsg("ERROR: missing name for -segment option"); System.exit(1); } onlySegments.add(args[i+1]); i += 2; } else { if (indexPath != null) { - msg("ERROR: unexpected extra argument '" + args[i] + "'"); + smsg("ERROR: unexpected extra argument '" + args[i] + "'"); System.exit(1); } indexPath = args[i]; @@ -396,8 +544,8 @@ } if (indexPath == null) { - msg("\nERROR: index path not specified"); - msg("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" + + smsg("\nERROR: index path not specified"); + smsg("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" + "\n" + " -fix: actually write a new segments_N file, removing any problematic segments\n" + " -segment X: only check the specified segments. This can be specified multiple\n" + @@ -423,32 +571,33 @@ if (onlySegments.size() == 0) onlySegments = null; else if (doFix) { - msg("ERROR: cannot specify both -fix and -segment"); + smsg("ERROR: cannot specify both -fix and -segment"); System.exit(1); } assert testAsserts(); if (!assertsOn) - msg("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene', so assertions are enabled"); + smsg("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene', so assertions are enabled"); - msg("\nOpening index @ " + indexPath + "\n"); + smsg("\nOpening index @ " + indexPath + "\n"); Directory dir = null; try { dir = FSDirectory.getDirectory(indexPath); } catch (Throwable t) { - msg("ERROR: could not open directory \"" + indexPath + "\"; exiting"); + smsg("ERROR: could not open directory \"" + indexPath + "\"; exiting"); t.printStackTrace(out); System.exit(1); } - CheckIndexStatus result = check(dir, doFix, onlySegments); + CheckIndex checker = new CheckIndex(dir); + checker.setInfoStream(out); - if (!result.clean) { - if (!doFix){ - msg("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -fix were specified\n"); + if (!checker.checkIndex(onlySegments)) { + if (!doFix) { + smsg("WARNING: would write new segments file, and " + checker.totLoseDocCount + " documents would be lost, if -fix were specified\n"); } else { - msg("WARNING: " + result.totLoseDocCount + " documents will be lost\n"); - msg("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!"); + smsg("WARNING: " + checker.totLoseDocCount + " documents will be lost\n"); + smsg("NOTE: will write new segments file in 5 seconds; this will remove " + checker.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!"); for(int s=0;s<5;s++) { try { Thread.sleep(1000); @@ -457,18 +606,18 @@ s--; continue; } - msg(" " + (5-i) + "..."); + smsg(" " + (5-i) + "..."); } - msg("Writing..."); - CheckIndex.fix(result); + smsg("Writing..."); + checker.fix(); } - msg("OK"); - msg("Wrote new segments file \"" + result.newSegments.getCurrentSegmentFileName() + "\""); + smsg("OK"); + smsg("Wrote new segments file \"" + checker.newSegments.getCurrentSegmentFileName() + "\""); } - msg(""); + smsg(""); final int exitCode; - if (result != null && result.clean == true) + if (checker != null && checker.clean == true) exitCode = 0; else exitCode = 1; Index: src/java/org/apache/lucene/index/CheckIndexStatus.java =================================================================== --- src/java/org/apache/lucene/index/CheckIndexStatus.java (revision 698267) +++ src/java/org/apache/lucene/index/CheckIndexStatus.java (working copy) @@ -1,74 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.store.Directory; - -import java.util.List; -import java.util.ArrayList; - - -/** - * - * - **/ -public class CheckIndexStatus { - - public boolean clean; - - - public boolean missingSegments; - public boolean cantOpenSegments; - public boolean missingSegmentVersion; - - - public String segmentsFileName; - public int numSegments; - public String segmentFormat; - public List/**/ segmentsChecked = new ArrayList(); - - public boolean toolOutOfDate; - - public List/*