Index: lucene/facet/src/test/org/apache/lucene/facet/FacetTestUtils.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/FacetTestUtils.java (revision 1538103) +++ lucene/facet/src/test/org/apache/lucene/facet/FacetTestUtils.java (working copy) @@ -23,13 +23,17 @@ public class FacetTestUtils { public static String toSimpleString(FacetResult fr) { + return toSimpleString(fr, fr.getFacetRequest().categoryPath.length); + } + + public static String toSimpleString(FacetResult fr, int startLength) { StringBuilder sb = new StringBuilder(); - toSimpleString(fr.getFacetRequest().categoryPath.length, 0, sb, fr.getFacetResultNode(), ""); + toSimpleString(startLength, 0, sb, fr.getFacetResultNode(), ""); return sb.toString(); } private static void toSimpleString(int startLength, int depth, StringBuilder sb, FacetResultNode node, String indent) { - sb.append(indent + node.label.components[startLength+depth-1] + " (" + (int) node.value + ")\n"); + sb.append(indent + (node.label == null ? "null" : node.label.components[startLength+depth-1]) + " (" + (int) node.value + ")\n"); for (FacetResultNode childNode : node.subResults) { toSimpleString(startLength, depth + 1, sb, childNode, indent + " "); } Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestEnumFacetsAccumulator.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestEnumFacetsAccumulator.java (revision 0) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestEnumFacetsAccumulator.java (working copy) @@ -0,0 +1,399 @@ +package org.apache.lucene.facet.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.facet.FacetTestCase; +import org.apache.lucene.facet.FacetTestUtils; +import org.apache.lucene.facet.index.FacetFields; +import org.apache.lucene.facet.params.FacetSearchParams; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util._TestUtil; + +// nocommit would be nice to have some more test infra here, +// so that we can e.g. randomize the facet method used for +// each test case ... we have so many now (taxo index, sorted +// set, enum)... + +public class TestEnumFacetsAccumulator extends FacetTestCase { + + public void testBasic() throws Exception { + // nocommit awkward to pass null for TW ... should we + // make a separate EnumFacetFields? + FacetFields facetFields = new FacetFields(null); + + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + facetFields.addEnumFields(doc, Collections.singletonList(new CategoryPath("field", "a"))); + w.addDocument(doc); + + doc = new Document(); + facetFields.addEnumFields(doc, Collections.singletonList(new CategoryPath("field", "b", "c"))); + w.addDocument(doc); + + doc = new Document(); + facetFields.addEnumFields(doc, Collections.singletonList(new CategoryPath("field", "b", "d"))); + w.addDocument(doc); + IndexReader r = w.getReader(); + w.close(); + + List requests = Collections.singletonList(new CountFacetRequest(new CategoryPath("field"), 10)); + + FacetsCollector c = FacetsCollector.create(new EnumFacetsAccumulator(new FacetSearchParams(requests))); + IndexSearcher s = newSearcher(r, false); + s.search(new MatchAllDocsQuery(), c); + List results = c.getFacetResults(); + assertEquals(1, results.size()); + assertEquals("field (3)\n b (2)\n a (1)\n", FacetTestUtils.toSimpleString(results.get(0))); + + // Prefix: + requests = Collections.singletonList(new CountFacetRequest(new CategoryPath("field", "b"), 10)); + + c = FacetsCollector.create(new EnumFacetsAccumulator(new FacetSearchParams(requests))); + s = newSearcher(r, false); + s.search(new MatchAllDocsQuery(), c); + results = c.getFacetResults(); + assertEquals(1, results.size()); + assertEquals("b (2)\n c (1)\n d (1)\n", FacetTestUtils.toSimpleString(results.get(0))); + r.close(); + dir.close(); + } + + // Test when one dim is a prefix of another + public void testDimPrefix() throws Exception { + // nocommit awkward to pass null for TW ... should we + // make a separate EnumFacetFields? + FacetFields facetFields = new FacetFields(null); + + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + List paths = new ArrayList(); + paths.add(new CategoryPath("field", "a")); + paths.add(new CategoryPath("field2", "a")); + facetFields.addEnumFields(doc, paths); + w.addDocument(doc); + + IndexReader r = w.getReader(); + w.close(); + + List requests = Collections.singletonList(new CountFacetRequest(new CategoryPath("field"), 10)); + + FacetsCollector c = FacetsCollector.create(new EnumFacetsAccumulator(new FacetSearchParams(requests))); + IndexSearcher s = newSearcher(r, false); + s.search(new MatchAllDocsQuery(), c); + List results = c.getFacetResults(); + assertEquals(1, results.size()); + assertEquals("field (1)\n a (1)\n", FacetTestUtils.toSimpleString(results.get(0))); + r.close(); + dir.close(); + } + + // fold into DS test + + // thread safety + + // nocommit nrt test + + // nocommit drill down test + + private static byte getToken() { + byte upto = 0; + while (upto < Byte.MAX_VALUE) { + if (random().nextDouble() > 0.75) { + return upto; + } + upto++; + } + + // Unlikely: + return upto; + } + + private Document getRandomDocument(FacetFields facetFields, byte[] facetField, byte[][] facetField2, byte[] contentField, int id, String[] labels) throws IOException { + facetField[id] = getToken(); + contentField[id] = getToken(); + + // How deep to make the hierarchical facet field: + int depth = _TestUtil.nextInt(random(), 2, 5); + facetField2[id] = new byte[depth]; + for(int j=0;j paths = new ArrayList(); + paths.add(new CategoryPath("facet", labels[facetField[id]])); + String[] hier = new String[facetField2[id].length+1]; + hier[0] = "facet2"; + for(int i=0;i> getTopNByCountThenLabel(Map counts, final String[] labels, int numFacets) { + + ArrayList> countsList = new ArrayList>(counts.entrySet()); + Collections.sort(countsList, new Comparator>() { + @Override + public int compare(Map.Entry a, Map.Entry b) { + // First by count, descending: + int cmp = b.getValue().intValue() - a.getValue().intValue(); + if (cmp != 0) { + return cmp; + } + + // Then by alpha-sort of the label, ascending: + return labels[a.getKey()].compareTo(labels[b.getKey()]); + } + }); + + if (countsList.size() > numFacets) { + countsList.subList(numFacets, countsList.size()).clear(); + } + + return countsList; + } + + public void testRandom() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + + // nocommit awkward to pass null for TW ... should we + // make a separate EnumFacetFields? + FacetFields facetFields = new FacetFields(null); + + // nocommit + int numDocs = atLeast(10000); + + // TODO: multi-valued + + // flat facet field + byte[] facetField = new byte[numDocs]; + + // hierarchical facet field + byte[][] facetField2 = new byte[numDocs][]; + + // single-valued content field + byte[] contentField = new byte[numDocs]; + + // Labels we use for the facet fields (keyed by + // facetField[i] and facetField2[i][j]): + final String[] labels = new String[Byte.MAX_VALUE]; + Set seen = new HashSet(); + for(int i=0;i requests = new ArrayList(); + + int numFacets = _TestUtil.nextInt(random(), 1, 100); + + // Flat facet field: + requests.add(new CountFacetRequest(new CategoryPath("facet"), numFacets)); + + // Hierarchical field: + int depth = random().nextInt(2); + byte[] hier = new byte[depth]; + String[] hierLabels = new String[1+depth]; + hierLabels[0] = "facet2"; + StringBuilder sb = new StringBuilder(); + sb.append("facet2"); + for(int i=0;i results = c.getFacetResults(); + assertEquals(2, results.size()); + + // Slow facet method: + Map counts = new HashMap(); + Map counts2 = new HashMap(); + + int totHitCount = 0; + int hierTopCount = 0; + + for(int doc=0;doc EnumFacetsAccumulator.ramBytesUsed()); + r.close(); + w.close(); + dir.close(); + } +} Property changes on: lucene/facet/src/test/org/apache/lucene/facet/search/TestEnumFacetsAccumulator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/index/FacetFields.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/index/FacetFields.java (revision 1538103) +++ lucene/facet/src/java/org/apache/lucene/facet/index/FacetFields.java (working copy) @@ -12,6 +12,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.facet.params.CategoryListParams; import org.apache.lucene.facet.params.FacetIndexingParams; @@ -191,4 +192,18 @@ } } + /** Indexes fields for faceting using {@link + * EnumFacetsAccumulator} + * + * @lucene.experimental */ + public void addEnumFields(Document doc, Iterable categories) throws IOException { + // nocommit this is effectively NO_PARENTS indexing; + // should we also allow ALL_BUT_DIM? then multi-valued + // hierarchy could count correctly: + for(CategoryPath cp : categories) { + // nocommit don't hardwire this field name; put it + // somewhere standard / allow FIP to change it?: + doc.add(new StringField("$enumfacets", cp.toString(indexingParams.getFacetDelimChar()), Field.Store.NO)); + } + } } Index: lucene/facet/src/java/org/apache/lucene/facet/search/EnumFacetsAccumulator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/EnumFacetsAccumulator.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/search/EnumFacetsAccumulator.java (working copy) @@ -0,0 +1,512 @@ +package org.apache.lucene.facet.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.NavigableMap; +import java.util.Set; +import java.util.WeakHashMap; +import java.util.concurrent.ConcurrentHashMap; + +import org.apache.lucene.facet.params.FacetIndexingParams; +import org.apache.lucene.facet.params.FacetSearchParams; +import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.PriorityQueue; +import org.apache.lucene.util.RamUsageEstimator.ObjectFilter; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.StringHelper; + +// nocommit should we assign global ords, in RAM, across +// segs? then counting across segs can be int[] (no +// hashmap lookup) + +// nocommit need a "getDrillDownFilter" method + +// nocommit test w/ drill sideways + +// TODO: can we factor out / share with / poach from Solr's +// impl? + +// TODO: more efficient RAM structures? since we pre-load +// entire field we can pack somehow... + +// nocommit should we "do negative" when number of hits is > maxDoc/2? + +/** Each CategoryPath should be length 1, which counts all + * terms in that field, or length 2, which counts all terms + * matching the prefix of the 2nd component. + * + *

Only use this for dims that have a smallish number of + * facet labels to be counted. + * + *

This loads bitsets or int[] into heap for every visited term + * of every field you facet on! Typically this is a good + * tradeoff (doesn't use much RAM and greatly speeds up the + * facet counting), as long as your field is low cardinality. + * + *

You should index your fields "normally", typically + * DOCS_ONLY e.g. using {@link StringField}. + * + *

This method acts like NO_PARENTS; this means it cannot + * handle multi-valued hierarchical fields, or + * multi-valued flat fields where you care about the root + * dimension count (how many fields had at least one facet + * label). + * + * @lucene.experimental */ +public class EnumFacetsAccumulator extends FacetsAccumulator { + + // Maps reader cache key -> sorted array of term+bitset (OneTerm) + private static final Map readerBits = new WeakHashMap(); + private final byte delimByte; + + public EnumFacetsAccumulator(FacetSearchParams fsp) { + super(fsp); + + char delim = searchParams.indexingParams.getFacetDelimChar(); + if (delim > 127) { + throw new IllegalStateException("the delim char must encode to a single byte in UTF-8 (got: 0x" + Integer.toHexString(delim) + ")"); + } + delimByte = (byte) delim; + } + + public static long ramBytesUsed() { + synchronized (readerBits) { + return RamUsageEstimator.sizeOf( + readerBits, + new ObjectFilter() { + @Override + public boolean accept(Object o) { + // Don't count RAM used by the segment + // core cache key: + return o.getClass().getName().equals("org.apache.lucene.index.SegmentCoreReaders") == false; + } + }); + } + } + + /** Clears term bitsets to free heap */ + public static void clear() { + synchronized (readerBits) { + readerBits.clear(); + } + } + + private static class OneTerm implements Comparable { + // nocommit byte[] term instead? + public final BytesRef term; + + // int[] (for "sparse" case) or FixedBitSet + public final Object bits; + + // Depth of this hierarchy + public final int depth; + + // Parent term: + public final OneTerm parent; + + public OneTerm(OneTerm parent, BytesRef term, Object bits, int depth) { + this.parent = parent; + this.term = term; + this.bits = bits; + this.depth = depth; + } + + @Override + public int compareTo(OneTerm other) { + return this.term.compareTo(other.term); + } + + @Override + public boolean equals(Object other) { + return other instanceof OneTerm && ((OneTerm) other).term.equals(term); + } + + @Override + public int hashCode() { + return term.hashCode(); + } + } + + private static final OneTerm ROOT_TERM = new OneTerm(null, null, null, 0); + + private OneTerm[] getReaderBits(AtomicReader reader) throws IOException { + + Object key = reader.getCoreCacheKey(); + + synchronized(readerBits) { + OneTerm[] result = readerBits.get(key); + if (result != null) { + // Already previously loaded + return result; + } + + // nocommit don't hard-code field name: + Terms terms = reader.fields().terms("$enumfacets"); + if (terms == null) { + return null; + } + + if (terms.size() > ArrayUtil.MAX_ARRAY_LENGTH) { + throw new IllegalStateException("too many terms (" + terms.size() + " > " + ArrayUtil.MAX_ARRAY_LENGTH + " reader=" + reader); + } + if (terms.size() == -1) { + throw new IllegalStateException("codec must implement terms.size() method (got: -1)"); + } + + // Reconstructs the parent chains for all OneTerm + // instances: + TreeBuilder treeBuilder = new TreeBuilder(delimByte); + result = new OneTerm[(int) terms.size()]; + readerBits.put(key, result); + + int maxDoc = reader.maxDoc(); + + DocsEnum docsEnum = null; + + TermsEnum termsEnum = terms.iterator(null); + + // Load all terms in this field: + int termCount = 0; + + while (true) { + BytesRef term = termsEnum.next(); + if (term == null) { + break; + } + + // We must pass null for liveDocs because this entry + // can be shared to "older" readers that have fewer + // deleted docs than the current one: + docsEnum = termsEnum.docs(null, docsEnum, 0); + + int docFreq = termsEnum.docFreq(); + + Object bits; + + if (docFreq > maxDoc/32) { + // High freq term; use non-sparse bits: + FixedBitSet fixedBits = new FixedBitSet(maxDoc); + int docID; + while ((docID = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) { + fixedBits.set(docID); + } + bits = fixedBits; + } else { + // nocommit use compressed bitsets (WAH8DocIdSet) if docFreq + // isn't that low...? + // Low freq term; use sparse representation: + int[] docIDs = new int[docFreq]; + for(int i=0;i stack = new ArrayList(); + private final byte delimByte; + + public TreeBuilder(byte delimByte) { + this.delimByte = delimByte; + stack.add(ROOT_TERM); + } + + private void pushPop(BytesRef term, int length, int depth, OneTerm newTerm) { + + // Pop current stack that doesn't match the new term: + if (stack.size() > depth) { + OneTerm curTerm = stack.get(depth); + if (curTerm.term.length != length || StringHelper.startsWith(term, curTerm.term) == false) { + while (stack.size() > depth) { + stack.remove(stack.size()-1); + } + } + } + + // Push: + if (stack.size() == depth) { + if (newTerm == null) { + // Must fill in a place-holder term when no docs + // had the "inner node"; this term never appears + // in the readerBits, only in the + // parent/grandparent/etc.: + newTerm = new OneTerm(stack.get(depth-1), + new BytesRef(term.bytes, 0, length), + null, + depth); + } + stack.add(newTerm); + } + + depth++; + } + + private OneTerm add(BytesRef _term, Object bits) { + BytesRef term = BytesRef.deepCopyOf(_term); + int depth = 1; + for(int i=0;i accumulate(List matchingDocs) throws IOException { + + List results = new ArrayList(); + for(FacetRequest request : searchParams.facetRequests) { + + int matchDepth = request.categoryPath.length+1; + + BytesRef prefix = new BytesRef(request.categoryPath.toString(searchParams.indexingParams.getFacetDelimChar())); + + // Holds facet counts for this request: + Map counts = new HashMap(); + + for(MatchingDocs docs : matchingDocs) { + AtomicReader reader = docs.context.reader(); + + OneTerm[] termBits = getReaderBits(reader); + if (termBits == null) { + // This reader doesn't have this field + continue; + } + + // TODO: how come the MatchingDocs.bits is never a sparse + // bitset (i.e. backed by sorted int[] docIDs)? We + // could factor out SortedIntDocSet from Solr ... + + countOneField(matchDepth, termBits, counts, prefix, reader.maxDoc(), docs.bits, docs.totalHits); + } + + // Pull top labels: + PriorityQueue> queue = new PriorityQueue>(request.numResults, false) { + @Override + protected boolean lessThan(Map.Entry a, Map.Entry b) { + // Sort first by facet count, descending + int cmp = a.getValue().intValue() - b.getValue().intValue(); + if (cmp != 0) { + return cmp < 0; + } + + // Then tie break by facet label, ascending + return b.getKey().term.compareTo(a.getKey().term) < 0; + } + }; + + // Rollup + Map counts2 = new HashMap(); + //System.out.println("rollup:"); + for(Map.Entry ent : counts.entrySet()) { + OneTerm oneTerm = ent.getKey(); + //System.out.println(" term=" + oneTerm.term.utf8ToString()); + while (oneTerm.depth > matchDepth) { + oneTerm = oneTerm.parent; + } + //System.out.println(" after=" + oneTerm.term.utf8ToString()); + Integer curCount = counts2.get(oneTerm); + if (curCount == null) { + counts2.put(oneTerm, ent.getValue()); + } else { + counts2.put(oneTerm, curCount.intValue() + ent.getValue().intValue()); + } + } + + int totCount = 0; + int topCount = 0; + for(Map.Entry ent : counts2.entrySet()) { + totCount += ent.getValue().intValue(); + // If there were any docs in the exact category, we + // don't include them in the queue but do add to + // totCount: + if (ent.getKey().depth == matchDepth) { + queue.insertWithOverflow(ent); + } + } + + FacetResultNode[] subResults = new FacetResultNode[queue.size()]; + while(queue.size() > 0) { + int spot = queue.size()-1; + Map.Entry ent = queue.pop(); + FacetResultNode node = new FacetResultNode(-1, ent.getValue().intValue()); + OneTerm key = ent.getKey(); + CategoryPath label = new CategoryPath(key.term.utf8ToString(), searchParams.indexingParams.getFacetDelimChar()); + assert key.depth == matchDepth: "got " + key.depth + " vs " + matchDepth; + node.label = label; + subResults[spot] = node; + } + FacetResultNode rootNode = new FacetResultNode(-1, totCount); + rootNode.label = request.categoryPath; + rootNode.subResults = Arrays.asList(subResults); + results.add(new FacetResult(request, rootNode, counts.size())); + } + + return results; + } + + private boolean prefixMatches(OneTerm oneTerm, BytesRef prefix) { + if (StringHelper.startsWith(oneTerm.term, prefix) == false) { + return false; + } + if (oneTerm.term.length > prefix.length && oneTerm.term.bytes[prefix.length] != delimByte) { + return false; + } + + return true; + } + + /** Iterate through the terms in one field, intersecting + * the bitsets to accumulate the counts */ + private void countOneField(int matchDepth, OneTerm[] termBits, Map counts, BytesRef prefix, + int maxDoc, FixedBitSet hits, int totalHits) throws IOException { + + assert totalHits == hits.cardinality(); + + int idx; + + if (prefix != null) { + OneTerm prefixTerm = new OneTerm(null, prefix, null, 0); + idx = Arrays.binarySearch(termBits, prefixTerm); + if (idx < 0) { + idx = -idx-1; + } + } else { + idx = 0; + } + + //System.out.println("count prefix=" + (prefix == null ? "null" : prefix.utf8ToString())); + + while (idx < termBits.length) { + OneTerm oneTerm = termBits[idx]; + //System.out.println(" next term=" + oneTerm.term.utf8ToString() + " depth=" + oneTerm.depth + " vs " + matchDepth); + if (prefix != null && prefixMatches(oneTerm, prefix) == false) { + //System.out.println(" break"); + break; + } + + // Intersect this query's hits w/ the docs that + // have this term: + int count = 0; + if (oneTerm.bits instanceof int[]) { + // Term bits is sparse + int[] termDocs = (int[]) oneTerm.bits; + + // TODO: tune this crossover point: + if (totalHits < termDocs.length/50) { + // Query's hits are sparse relative to the term: + int docID = 0; + int nextIDX = 0; + while (docID < maxDoc && (docID = hits.nextSetBit(docID)) != -1 && nextIDX < termDocs.length) { + // TODO: make this smarter / factor out + // Solr's SortedIntDocSet.intersectionSize: + int loc = Arrays.binarySearch(termDocs, nextIDX, termDocs.length, docID); + if (loc >= 0) { + count++; + nextIDX = loc+1; + } else { + nextIDX = -loc-1; + } + + docID++; + } + + } else { + int[] docIDs = (int[]) oneTerm.bits; + for(int i=0;i */ public static long sizeOf(Object obj) { - return measureObjectSize(obj); + return sizeOf(obj, null); } + /** Optionally pass an instance of this to {@link + * #sizeof(Object,ObjectFilter)} to restrict which + * objects are visited. */ + public interface ObjectFilter { + /** Return true if the size of this Object should be + * visited. If you return false than neither this + * object nor any objects only it references will be + * visited. */ + public boolean accept(Object obj); + } + + // nocommit javadocs + public static long sizeOf(Object obj, ObjectFilter filter) { + return measureObjectSize(obj, filter); + } + /** * Estimates a "shallow" memory usage of the given object. For arrays, this will be the * memory taken by array storage (no subreferences will be followed). For objects, this @@ -401,7 +417,7 @@ * or complex graphs (a max. recursion depth on my machine was ~5000 objects linked in a chain * so not too much). */ - private static long measureObjectSize(Object root) { + private static long measureObjectSize(Object root, ObjectFilter filter) { // Objects seen so far. final IdentityHashSet seen = new IdentityHashSet(); // Class cache with reference Field and precalculated shallow size. @@ -414,6 +430,10 @@ while (!stack.isEmpty()) { final Object ob = stack.remove(stack.size() - 1); + if (filter != null && filter.accept(ob) == false) { + continue; + } + if (ob == null || seen.contains(ob)) { continue; }