Description
One of the solr nodes in our SolrCloud was killed. It caused tlog was corrupted. Now the node can't finish recoverying. There is an excepion:
Caused by: java.lang.IndexOutOfBoundsException: Index: 14, Size: 13
at java.util.ArrayList.RangeCheck(ArrayList.java:547)
at java.util.ArrayList.get(ArrayList.java:322)
at org.apache.solr.update.TransactionLog$LogCodec.readExternString(TransactionLog.java:128)
at org.apache.solr.common.util.JavaBinCodec.readVal(JavaBinCodec.java:188)
at org.apache.solr.common.util.JavaBinCodec.readOrderedMap(JavaBinCodec.java:120)
at org.apache.solr.common.util.JavaBinCodec.readVal(JavaBinCodec.java:184)
at org.apache.solr.common.util.JavaBinCodec.readArray(JavaBinCodec.java:451)
at org.apache.solr.common.util.JavaBinCodec.readVal(JavaBinCodec.java:182)
at org.apache.solr.common.util.JavaBinCodec.readOrderedMap(JavaBinCodec.java:121)
at org.apache.solr.common.util.JavaBinCodec.readVal(JavaBinCodec.java:184)
at org.apache.solr.common.util.JavaBinCodec.readArray(JavaBinCodec.java:451)
at org.apache.solr.common.util.JavaBinCodec.readVal(JavaBinCodec.java:182)
at org.apache.solr.common.util.JavaBinCodec.readArray(JavaBinCodec.java:451)
at org.apache.solr.common.util.JavaBinCodec.readVal(JavaBinCodec.java:182)
at org.apache.solr.update.TransactionLog$ReverseReader.next(TransactionLog.java:708)
at org.apache.solr.update.UpdateLog$RecentUpdates.update(UpdateLog.java:906)
at org.apache.solr.update.UpdateLog$RecentUpdates.access$000(UpdateLog.java:846)
at org.apache.solr.update.UpdateLog.getRecentUpdates(UpdateLog.java:996)
at org.apache.solr.update.UpdateLog.init(UpdateLog.java:241)
at org.apache.solr.update.UpdateHandler.initLog(UpdateHandler.java:94)
at org.apache.solr.update.UpdateHandler.<init>(UpdateHandler.java:123)
at org.apache.solr.update.DirectUpdateHandler2.<init>(DirectUpdateHandler2.java:97)
... 31 more
I check the code in UpdateLog.java. I find that only IOException is catched when the above expception happens.
private void update() { int numUpdates = 0; updateList = new ArrayList<List<Update>>(logList.size()); deleteByQueryList = new ArrayList<Update>(); deleteList = new ArrayList<DeleteUpdate>(); updates = new HashMap<Long,Update>(numRecordsToKeep); for (TransactionLog oldLog : logList) { List<Update> updatesForLog = new ArrayList<Update>(); TransactionLog.ReverseReader reader = null; try { reader = oldLog.getReverseReader(); while (numUpdates < numRecordsToKeep) { Object o = reader.next(); if (o==null) break; try { // should currently be a List<Oper,Ver,Doc/Id> List entry = (List)o; // TODO: refactor this out so we get common error handling int opAndFlags = (Integer)entry.get(0); if (latestOperation == 0) { latestOperation = opAndFlags; } int oper = opAndFlags & UpdateLog.OPERATION_MASK; long version = (Long) entry.get(1); switch (oper) { case UpdateLog.ADD: case UpdateLog.DELETE: case UpdateLog.DELETE_BY_QUERY: Update update = new Update(); update.log = oldLog; update.pointer = reader.position(); update.version = version; updatesForLog.add(update); updates.put(version, update); if (oper == UpdateLog.DELETE_BY_QUERY) { deleteByQueryList.add(update); } else if (oper == UpdateLog.DELETE) { deleteList.add(new DeleteUpdate(version, (byte[])entry.get(2))); } break; case UpdateLog.COMMIT: break; default: throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown Operation! " + oper); } } catch (ClassCastException cl) { log.warn("Unexpected log entry or corrupt log. Entry=" + o, cl); // would be caused by a corrupt transaction log } catch (Exception ex) { log.warn("Exception reverse reading log", ex); break; } } } catch (IOException e) { // failure to read a log record isn't fatal log.error("Exception reading versions from log",e); } finally { if (reader != null) reader.close(); } updateList.add(updatesForLog); } }