Index: modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiTokenizer.java
===================================================================
--- modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiTokenizer.java	(revision 1303721)
+++ modules/analysis/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiTokenizer.java	(working copy)
@@ -192,7 +192,6 @@
   }
   
   /** blast some random large strings through the analyzer */
-  @Ignore("FIXME: see LUCENE-3897")
   public void testRandomHugeStrings() throws Exception {
     checkRandomData(random, analyzer, 200*RANDOM_MULTIPLIER, 8192);
     checkRandomData(random, analyzerNoPunct, 200*RANDOM_MULTIPLIER, 8192);
Index: modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiTokenizer.java
===================================================================
--- modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiTokenizer.java	(revision 1303721)
+++ modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/KuromojiTokenizer.java	(working copy)
@@ -588,27 +588,71 @@
 
       if (pos - lastBackTracePos >= MAX_BACKTRACE_GAP) {
         // Safety: if we've buffered too much, force a
-        // backtrace now:
+        // backtrace now.  We find the least-cost partial
+        // path, across all paths, backtrace from it, and
+        // then prune all others.  Note that this, in
+        // general, can produce the wrong result, if the
+        // total bast path did not in fact back trace
+        // through this partial best path.  But it's the
+        // best we can do... (short of not having a
+        // safety!).
+
+        // First pass: find least cost parital path so far,
+        // including ending at future positions:
         int leastIDX = -1;
         int leastCost = Integer.MAX_VALUE;
-        for(int idx=0;idx<posData.count;idx++) {
-          //System.out.println("    idx=" + idx + " cost=" + cost);
-          final int cost = posData.costs[idx];
-          if (cost < leastCost) {
-            leastCost = cost;
-            leastIDX = idx;
+        Position leastPosData = null;
+        for(int pos2=pos;pos2<positions.getNextPos();pos2++) {
+          final Position posData2 = positions.get(pos2);
+          for(int idx=0;idx<posData2.count;idx++) {
+            //System.out.println("    idx=" + idx + " cost=" + cost);
+            final int cost = posData.costs[idx];
+            if (cost < leastCost) {
+              leastCost = cost;
+              leastIDX = idx;
+              leastPosData = posData2;
+            }
           }
         }
-        backtrace(posData, leastIDX);
 
+        // We will always have at least one live path:
+        assert leastIDX != -1;
+
+        // Second pass: prune all but the best path:
+        for(int pos2=pos;pos2<positions.getNextPos();pos2++) {
+          final Position posData2 = positions.get(pos2);
+          if (posData2 != leastPosData) {
+            posData2.reset();
+          } else {
+            if (leastIDX != 0) {
+              posData2.costs[0] = posData2.costs[leastIDX];
+              posData2.lastRightID[0] = posData2.lastRightID[leastIDX];
+              posData2.backPos[0] = posData2.backPos[leastIDX];
+              posData2.backIndex[0] = posData2.backIndex[leastIDX];
+              posData2.backID[0] = posData2.backID[leastIDX];
+              posData2.backType[0] = posData2.backType[leastIDX];
+            }
+            posData2.count = 1;
+          }
+        }
+
+        backtrace(leastPosData, 0);
+
         // Re-base cost so we don't risk int overflow:
-        Arrays.fill(posData.costs, 0, posData.count, 0);
+        Arrays.fill(leastPosData.costs, 0, leastPosData.count, 0);
 
         if (pending.size() != 0) {
           return;
         } else {
           // This means the backtrace only produced
           // punctuation tokens, so we must keep parsing.
+          if (pos != leastPosData.pos) {
+            // We jumped into a future position; continue to
+            // the top of the loop to skip until we get
+            // there:
+            assert pos < leastPosData.pos;
+            continue;
+          }
         }
       }
 
Index: dev-tools/scripts/checkJavaDocs.py
===================================================================
--- dev-tools/scripts/checkJavaDocs.py	(revision 0)
+++ dev-tools/scripts/checkJavaDocs.py	(working copy)
@@ -0,0 +1,48 @@
+import sys
+import os
+import re
+
+reHREF = re.compile('<a.*?>(.*?)</a>', re.IGNORECASE)
+
+def checkPackageSummaries(root):
+  """
+  Just checks for blank summary lines in package-summary.html; returns
+  True if there are problems.
+  """
+  
+  #for dirPath, dirNames, fileNames in os.walk('%s/lucene/build/docs/api' % root):
+
+  if False:
+    os.chdir(root)
+    print
+    print 'Run "ant javadocs" > javadocs.log...'
+    if os.system('ant javadocs > javadocs.log 2>&1'):
+      print '  FAILED'
+      sys.exit(1)
+    
+  print
+  print 'Check...'
+  anyMissing = False
+  for dirPath, dirNames, fileNames in os.walk(root):
+    if 'package-summary.html' in fileNames:
+      fullPath = '%s/package-summary.html' % dirPath
+      printed = False
+      f = open(fullPath)
+      lastLine = None
+      for line in f.readlines():
+        lineLower = line.strip().lower()
+        if lineLower in ('<td>&nbsp;</td>', '<td></td>'):
+          m = reHREF.search(lastLine)
+          if not printed:
+            print
+            print fullPath
+            printed = True
+          print '  missing: %s' % m.group(1)
+          anyMissing = True
+        lastLine = line
+      f.close()
+
+  return anyMissing
+
+if __name__ == '__main__':
+  checkPackageSummaries(sys.argv[1])

Property changes on: dev-tools/scripts/checkJavaDocs.py
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
Index: dev-tools/scripts/smokeTestRelease.py
===================================================================
--- dev-tools/scripts/smokeTestRelease.py	(revision 1303721)
+++ dev-tools/scripts/smokeTestRelease.py	(working copy)
@@ -26,6 +26,7 @@
 import xml.etree.ElementTree as ET
 import filecmp
 import platform
+import checkJavaDocs
 
 # This tool expects to find /lucene and /solr off the base URL.  You
 # must have a working gpg, tar, unzip in your path.  This has been
@@ -243,7 +244,7 @@
     # NOTE: O(N) but N should be smallish:
     if 'CHANGES.txt' in files:
       fullPath = '%s/CHANGES.txt' % root
-      print 'CHECK %s' % fullPath
+      #print 'CHECK %s' % fullPath
       checkChangesContent(open(fullPath).read(), version, fullPath, project, False)
       
 def checkChangesContent(s, version, name, project, isHTML):
@@ -383,6 +384,9 @@
       # test javadocs
       print '    generate javadocs w/ Java 5...'
       run('export JAVA_HOME=%s; ant javadocs' % JAVA5_HOME, '%s/javadocs.log' % unpackPath)
+      if checkJavaDocs.checkPackageSummaries('build/docs/api'):
+        raise RuntimeError('javadoc summaries failed')
+      
     else:
       print '    run tests w/ Java 6...'
       run('export JAVA_HOME=%s; ant test' % JAVA6_HOME, '%s/test.log' % unpackPath)
