Index: CHANGES.txt
===================================================================
--- CHANGES.txt (revision 908159)
+++ CHANGES.txt (working copy)
@@ -27,6 +27,10 @@
* LUCENE-2158: At high indexing rates, NRT reader could temporarily
lose deletions. (Mike McCandless)
+ * LUCENE-2257: Increase max number of unique terms in one segment to
+ termIndexInterval (default 128) * ~2.1 billion = ~274 billion.
+ (Tom Burton-West via Mike McCandless)
+
API Changes
* LUCENE-2182: DEFAULT_ATTRIBUTE_FACTORY was failing to load
Index: src/java/org/apache/lucene/index/SegmentTermEnum.java
===================================================================
--- src/java/org/apache/lucene/index/SegmentTermEnum.java (revision 908410)
+++ src/java/org/apache/lucene/index/SegmentTermEnum.java (working copy)
@@ -108,7 +108,7 @@
return clone;
}
- final void seek(long pointer, int p, Term t, TermInfo ti)
+ final void seek(long pointer, long p, Term t, TermInfo ti)
throws IOException {
input.seek(pointer);
position = p;
Index: src/java/org/apache/lucene/index/TermInfosReader.java
===================================================================
--- src/java/org/apache/lucene/index/TermInfosReader.java (revision 908410)
+++ src/java/org/apache/lucene/index/TermInfosReader.java (working copy)
@@ -170,7 +170,7 @@
private final void seekEnum(SegmentTermEnum enumerator, int indexOffset) throws IOException {
enumerator.seek(indexPointers[indexOffset],
- (indexOffset * totalIndexInterval) - 1,
+ ((long) indexOffset * totalIndexInterval) - 1,
indexTerms[indexOffset], indexInfos[indexOffset]);
}
@@ -241,28 +241,6 @@
return ti;
}
- /** Returns the nth term in the set. */
- final Term get(int position) throws IOException {
- if (size == 0) return null;
-
- SegmentTermEnum enumerator = getThreadResources().termEnum;
- if (enumerator.term() != null &&
- position >= enumerator.position &&
- position < (enumerator.position + totalIndexInterval))
- return scanEnum(enumerator, position); // can avoid seek
-
- seekEnum(enumerator, position/totalIndexInterval); // must seek
- return scanEnum(enumerator, position);
- }
-
- private final Term scanEnum(SegmentTermEnum enumerator, int position) throws IOException {
- while(enumerator.position < position)
- if (!enumerator.next())
- return null;
-
- return enumerator.term();
- }
-
private void ensureIndexIsRead() {
if (indexTerms == null) {
throw new IllegalStateException("terms index was not loaded when this reader was created");
Index: src/site/src/documentation/content/xdocs/fileformats.xml
===================================================================
--- src/site/src/documentation/content/xdocs/fileformats.xml (revision 908159)
+++ src/site/src/documentation/content/xdocs/fileformats.xml (working copy)
@@ -1845,11 +1845,12 @@
When referring to term numbers, Lucene's current
- implementation uses a Java int, which means
- the maximum number of unique terms in any single index
- segment is 2,147,483,648. This is technically not a
- limitation of the index file format, just of Lucene's
- current implementation.
+ implementation uses a Java int to hold the
+ term index, which means the maximum number of unique
+ terms in any single index segment is ~2.1 billion times
+ the term index interval (default 128) = ~274 billion.
+ This is technically not a limitation of the index file
+ format, just of Lucene's current implementation.
Similarly, Lucene uses a Java int to refer
Index: docs/fileformats.pdf
===================================================================
--- docs/fileformats.pdf (revision 908159)
+++ docs/fileformats.pdf (working copy)
@@ -667,10 +667,10 @@
>>
endobj
111 0 obj
-<< /Length 1801 /Filter [ /ASCII85Decode /FlateDecode ]
+<< /Length 1845 /Filter [ /ASCII85Decode /FlateDecode ]
>>
stream
-Gatm<>E@c%'Ro4HjK13fG\lPL)aB'A"VfQS_N=$kG,NXdH;*]f';idMD;tC&-V:5k;U.'/tp@-$LTH7^Q,1p?N)YM*ArPAGr!8(0!,&.e*ZcV$F&^;F6Ba=Bj-erc@iWHmO",>o\fW2U#KJ8%,]-0<;4\qcMW)dW(W4r')L'%gC?hm*f`HWnV%dbVZG#-_jUj#'P\O_oZ'&jIQr]%54&Rn#K1_F/7O2B740fG=lc[4Ro3YmceBq7Jn539+CM0NV+[Zoel+/JaFIhGpmP4I%\.R[ZC5-ce+e*fLJ4fAT\@3QgGc1u-7W7h"cbCre?E8TM&At8O!?\Zsm0q0)\(>&=We,.h1o`pZE7qOk1rFOCj3(%1n;J"B0i&M[BePZMTY8(,j_P+]_P5I?\J/(K8Z6DjQ_^H`SEO/Y">"_S/9Z@,81;O;nec;K.Sh35ViII!^N'h`d&Fa!oTbnAF-)SW:(h*^RW!ZkFMnf4=9B6Til(FqYcoO?tniTMb>_+7;LbV\NtQAdON4g_="n/dHr.9:I`img>pmXGCoL.5dT;_%U,hKC-2."mArlK#2QtTjDcU^OZijUGX1BS&fM_IqtMc4YW2pchD7rNX.bkiX:CO+B#XE_l!(M&o8,(lC&:VFoWIf2JGO'0#O'r5)PBdF3_]8*R*2#c0Gb-m"MMVl"4r1ri%&PJjQZZUW)F>7c#SGN$u)DUp-W7j\O%/N"t%)+rC6*J5b&i/rg?gf)qQ>0oC<>?l$4CDGTap@Wn1#_`e#([onhm(7l/DLQ].K%&"i%@qog]SY54_K@#]&\CdSEi2a6_A-\ptml007OI2_pGssbFA@6[laqdrV2`R)'V"YX4fS*\3f>,V>4#L01afe)u\m0--fn,sQ)-]FFs,o^4H.qb\/+2kb`6l+lrN;Y4Gh8JS#)F1%g,CT;[NS:Jn>_(cEbKaGProLPjEZK-n>=?sW2b%F]e/'&0&"]e\7%IOAG78TS[DN8C+WmlWBfB"=LHK?[dUc'ujV5J\KMr?G`JG;6k+/'3XN&F5`o:W;koaM4seWJhHt0u4mEMOr5Bo>0`iiqk,6HmeKLq4s5I7W&`*]&'g1*_%_OUd!0NMK,:Za1Fo%!78.6-^dpe7i'q*bg[A:V/te//^*dZeQmd3b+3">0,"]'IfX.'6n&~>
+Gatm<>BAg]'RnB3n/LcAW02O@J/Pq745JTKec65F#:5KGZ*oni-#N@,hi;M#d?,Yd>@9L+Eb+E>^2oQXAs9^1ai=_XQ0'^&@dco^idiB%[<2Vm-.F?@O`+.)LNo/%e)!DF@c9H;iPf\X6.,28n**]:&i5-/YXjesG6chX''KWD??0@/BgBgPg/i+("o=j2NX(P0'ODl8@)CV8'q/0JJ'qT,2dZj@eRhl2)eG5hql,[U'&GA5g]tBkWd"ht*C]:.X,IJD:$Y8*\o"9@*4#=aI`*i2[+QM-hWkQX:B7;*(I#aIE4Y-7]I@hlm_I=Ys^3AIVYCG^U?e;aZ/$Qjg2,^d?/3#mjXsjHcBla:tk5Zc1Wp9=8F?5!XlfGL:G5q0gNL#ZR+WL!h;L)l;MY.r[`SLg6)`G/k_:L[KZnU*72gF,.XiE3`m5#7f1@JS.WIo@efS#26'!,qgh`+lfY'KqWP4WnNG$TdmPA&P1bNXJEpZ\B5_g1KufB?AQgcV/oL3562M1$jDUZ1WCYN83`gae1`4ETh\sT\c[KpZmarCa$tR*@=Y;>DfdgNV-pfL=BX>945KHrX\,OeJpGQrNP66?Z4=7n(UJ]F!FJr.#+@(,[$AS,(A(1%\_5*=SV2gop=(.1,XC28DF:0,;X?n:1j]'Wb.SUFW*)2i<_uNVVVkC$3J`Vrg?RV1/(db)*gG\kW84OJT(0*+D9"Le6*KPe'UFCGc%8Uf"`,Uc=9Bb\NVp6YGICZ%"(U$^I)i[3O21F*H=B\^-/',Lfd)SVJd[/QeTj/7!KC?b2Sr8-R&2$!%f9\.Z;KjY/\pF]/`6V/E$iU0aau[6U,bNl>lkro4uNKp7S4\bE2qFEc1sV3VnHsGg=oj%ebEhM2anjEk9A5(cdng-*M"4t5uKquQQ_a69:iC8#Ds(]^mtB.WYGadC0Adc)WACId3qH"_<63NQG`$BeI4^mk/[+:Hd>#e@!eWJq1$^qlA*I$M'_Vg+Q_O?7#,E2C?@^ATK_ZF85?IOS2(1O_eSX_NLU4h_NH#b`4qeD:hJW!qA#HkL"n)KBH@-.LpP>=ki2,O,5a/`l#J;k\mu>9-Z2""?/*82.L2pc7M[%J+,^%=DgZ1dXbF=T#uc3+nuNVAp&*oI>jo`AQCR[td7;TqaI0,q(ah.jdG(2!m_[[pb3OpT`#>323Wj*LEW=Faj\2j3$aQbKW?iCt:Ss^4Pb5'-*c,geVDUOr:]L=E^(LLf
endstream
endobj
112 0 obj
@@ -1206,80 +1206,80 @@
xref
0 153
0000000000 65535 f
-0000054446 00000 n
-0000054652 00000 n
-0000054745 00000 n
+0000054490 00000 n
+0000054696 00000 n
+0000054789 00000 n
0000000015 00000 n
0000000071 00000 n
0000001333 00000 n
0000001453 00000 n
0000001639 00000 n
-0000054897 00000 n
+0000054941 00000 n
0000001774 00000 n
-0000054960 00000 n
+0000055004 00000 n
0000001909 00000 n
-0000055026 00000 n
+0000055070 00000 n
0000002046 00000 n
-0000055090 00000 n
+0000055134 00000 n
0000002183 00000 n
-0000055156 00000 n
+0000055200 00000 n
0000002320 00000 n
-0000055222 00000 n
+0000055266 00000 n
0000002457 00000 n
-0000055288 00000 n
+0000055332 00000 n
0000002594 00000 n
-0000055352 00000 n
+0000055396 00000 n
0000002731 00000 n
-0000055416 00000 n
+0000055460 00000 n
0000002868 00000 n
-0000055482 00000 n
+0000055526 00000 n
0000003005 00000 n
-0000055547 00000 n
+0000055591 00000 n
0000003142 00000 n
-0000055613 00000 n
+0000055657 00000 n
0000003279 00000 n
-0000055679 00000 n
+0000055723 00000 n
0000003416 00000 n
-0000055744 00000 n
+0000055788 00000 n
0000003553 00000 n
-0000055810 00000 n
+0000055854 00000 n
0000003690 00000 n
-0000055874 00000 n
+0000055918 00000 n
0000003826 00000 n
-0000055938 00000 n
+0000055982 00000 n
0000003963 00000 n
-0000056004 00000 n
+0000056048 00000 n
0000004100 00000 n
-0000056070 00000 n
+0000056114 00000 n
0000004237 00000 n
-0000056135 00000 n
+0000056179 00000 n
0000004373 00000 n
-0000056201 00000 n
+0000056245 00000 n
0000004510 00000 n
-0000056265 00000 n
+0000056309 00000 n
0000004647 00000 n
-0000056331 00000 n
+0000056375 00000 n
0000004783 00000 n
-0000056397 00000 n
+0000056441 00000 n
0000004920 00000 n
0000005673 00000 n
0000005796 00000 n
0000005872 00000 n
-0000056461 00000 n
+0000056505 00000 n
0000006004 00000 n
-0000056527 00000 n
+0000056571 00000 n
0000006137 00000 n
-0000056591 00000 n
+0000056635 00000 n
0000006270 00000 n
-0000056656 00000 n
+0000056700 00000 n
0000006403 00000 n
-0000056721 00000 n
+0000056765 00000 n
0000006536 00000 n
-0000056786 00000 n
+0000056830 00000 n
0000006669 00000 n
-0000056851 00000 n
+0000056895 00000 n
0000006801 00000 n
-0000056916 00000 n
+0000056960 00000 n
0000006934 00000 n
0000009083 00000 n
0000009191 00000 n
@@ -1317,47 +1317,47 @@
0000043725 00000 n
0000045456 00000 n
0000045566 00000 n
-0000047461 00000 n
-0000056981 00000 n
-0000047571 00000 n
-0000047771 00000 n
-0000047989 00000 n
-0000048195 00000 n
-0000048403 00000 n
-0000048571 00000 n
-0000048771 00000 n
-0000048929 00000 n
-0000049104 00000 n
-0000049367 00000 n
-0000049608 00000 n
-0000049737 00000 n
-0000049891 00000 n
-0000050045 00000 n
-0000050189 00000 n
-0000050339 00000 n
-0000050480 00000 n
-0000050715 00000 n
-0000050910 00000 n
-0000051150 00000 n
-0000051332 00000 n
-0000051505 00000 n
-0000051708 00000 n
-0000051896 00000 n
-0000052148 00000 n
-0000052289 00000 n
-0000052498 00000 n
-0000052684 00000 n
-0000052858 00000 n
-0000053103 00000 n
-0000053294 00000 n
-0000053500 00000 n
-0000053666 00000 n
-0000053780 00000 n
-0000053891 00000 n
-0000054003 00000 n
-0000054112 00000 n
-0000054219 00000 n
-0000054336 00000 n
+0000047505 00000 n
+0000057025 00000 n
+0000047615 00000 n
+0000047815 00000 n
+0000048033 00000 n
+0000048239 00000 n
+0000048447 00000 n
+0000048615 00000 n
+0000048815 00000 n
+0000048973 00000 n
+0000049148 00000 n
+0000049411 00000 n
+0000049652 00000 n
+0000049781 00000 n
+0000049935 00000 n
+0000050089 00000 n
+0000050233 00000 n
+0000050383 00000 n
+0000050524 00000 n
+0000050759 00000 n
+0000050954 00000 n
+0000051194 00000 n
+0000051376 00000 n
+0000051549 00000 n
+0000051752 00000 n
+0000051940 00000 n
+0000052192 00000 n
+0000052333 00000 n
+0000052542 00000 n
+0000052728 00000 n
+0000052902 00000 n
+0000053147 00000 n
+0000053338 00000 n
+0000053544 00000 n
+0000053710 00000 n
+0000053824 00000 n
+0000053935 00000 n
+0000054047 00000 n
+0000054156 00000 n
+0000054263 00000 n
+0000054380 00000 n
trailer
<<
/Size 153
@@ -1365,5 +1365,5 @@
/Info 4 0 R
>>
startxref
-57035
+57079
%%EOF
Index: docs/fileformats.html
===================================================================
--- docs/fileformats.html (revision 908159)
+++ docs/fileformats.html (working copy)
@@ -2547,11 +2547,12 @@
When referring to term numbers, Lucene's current
- implementation uses a Java int, which means
- the maximum number of unique terms in any single index
- segment is 2,147,483,648. This is technically not a
- limitation of the index file format, just of Lucene's
- current implementation.
+ implementation uses a Java int to hold the
+ term index, which means the maximum number of unique
+ terms in any single index segment is ~2.1 billion times
+ the term index interval (default 128) = ~274 billion.
+ This is technically not a limitation of the index file
+ format, just of Lucene's current implementation.
Similarly, Lucene uses a Java int to refer