Index: src/java/org/apache/lucene/search/CachingWrapperFilter.java =================================================================== --- src/java/org/apache/lucene/search/CachingWrapperFilter.java (revision 525597) +++ src/java/org/apache/lucene/search/CachingWrapperFilter.java (working copy) @@ -30,13 +30,13 @@ * caching, keeping the two concerns decoupled yet composable. */ public class CachingWrapperFilter extends Filter { - private Filter filter; + protected Filter filter; /** * @todo What about serialization in RemoteSearchable? Caching won't work. * Should transient be removed? */ - private transient Map cache; + protected transient Map cache; /** * @param filter Filter to cache results of Index: src/java/org/apache/lucene/search/FilterManager.java =================================================================== --- src/java/org/apache/lucene/search/FilterManager.java (revision 0) +++ src/java/org/apache/lucene/search/FilterManager.java (revision 0) @@ -0,0 +1,205 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Comparator; +import java.util.Date; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.TreeSet; + +/** + * Filter caching singleton. It can be used by {@link org.apache.lucene.search.RemoteCachingWrapperFilter} + * or just to save filters locally for reuse. + * This class makes it possble to cache Filters even when using RMI, as it + * keeps the cache on the seaercher side of the RMI connection. + * + * Also could be used as a persistent storage for any filter as long as the + * filter provides a proper hashCode(), as that is used as the key in the cache. + * + * The cache is periodically cleaned up from a separate thread to ensure the + * cache doesn't exceed the maximum size. + * @author Matt Ericson + */ +public class FilterManager { + + protected static FilterManager manager; + + /** The default maximum number of Filters in the cache */ + protected static final int DEFAULT_CACHE_CLEAN_SIZE = 100; + /** The default frequency of cache clenup */ + protected static final long DEFAULT_CACHE_SLEEP_TIME = 1000 * 60 * 10; + + /** The cache itself */ + protected Map cache; + /** Maximum allowed cache size */ + protected int cacheCleanSize; + /** Cache cleaning frequency */ + protected long cleanSleepTime; + /** Cache cleaner that runs in a separate thread */ + protected FilterCleaner filterCleaner; + + public synchronized static FilterManager getInstance() { + if (manager == null) { + manager = new FilterManager(); + } + return manager; + } + + /** + * Sets up the FilterManager singleton. + */ + protected FilterManager() { + cache = new HashMap(); + cacheCleanSize = DEFAULT_CACHE_CLEAN_SIZE; // Let the cache get to 100 items + cleanSleepTime = DEFAULT_CACHE_SLEEP_TIME; // 10 minutes between cleanings + + filterCleaner = new FilterCleaner(); + Thread fcThread = new Thread(filterCleaner); + // setto be a Daemon so it doesn't have to be stopped + fcThread.setDaemon(true); + fcThread.start(); + } + + /** + * Sets the max size that cache should reach before it is cleaned up + * @param cacheCleanSize maximum allowed cache size + */ + public void setCacheSize(int cacheCleanSize) { + this.cacheCleanSize = cacheCleanSize; + } + + /** + * Sets the cache cleaning frequency in milliseconds. + * @param cleanSleepTime cleaning frequency in millioseconds + */ + public void setCleanThreadSleepTime(long cleanSleepTime) { + this.cleanSleepTime = cleanSleepTime; + } + + /** + * Returns the cached version of the filter. Allows the caller to pass up + * a small filter but this will keep a persistent version around and allow + * the caching filter to do its job. + * + * @param filter The input filter + * @return The cached version of the filter + */ + public Filter getFilter(Filter filter) { + synchronized(cache) { + FilterItem fi = null; + fi = (FilterItem)cache.get(new Integer(filter.hashCode())); + if (fi != null) { + fi.timestamp = new Date().getTime(); + return fi.filter; + } + cache.put(new Integer(filter.hashCode()), new FilterItem(filter)); + return filter; + } + } + + /** + * Holds the filter and the last time the filter was used, to make LRU-based + * cache cleaning possible. + * TODO: Clean this up when we switch to Java 1.5 + */ + protected class FilterItem { + public Filter filter; + public long timestamp; + + public FilterItem (Filter filter) { + this.filter = filter; + this.timestamp = new Date().getTime(); + } + } + + + /** + * Keeps the cache from getting too big. + * If we were using Java 1.5, we could use LinkedHashMap and we would not need this thread + * to clean out the cache. + * + * The SortedSet sortedFilterItems is used only to sort the items from the cache, + * so when it's time to clean up we have the TreeSet sort the FilterItems by + * timestamp. + * + * Removes 1.5 * the numbers of items to make the cache smaller. + * For example: + * If cache clean size is 10, and the cache is at 15, we would remove (15 - 10) * 1.5 = 7.5 round up to 8. + * This way we clean the cache a bit more, and avoid having the cache cleaner having to do it frequently. + */ + protected class FilterCleaner implements Runnable { + + private boolean running = true; + private TreeSet sortedFilterItems; + + public FilterCleaner() { + sortedFilterItems = new TreeSet(new Comparator() { + public int compare(Object a, Object b) { + if( a instanceof Map.Entry && b instanceof Map.Entry) { + FilterItem fia = (FilterItem) ((Map.Entry)a).getValue(); + FilterItem fib = (FilterItem) ((Map.Entry)b).getValue(); + if ( fia.timestamp == fib.timestamp ) { + return 0; + } + // smaller timestamp first + if ( fia.timestamp < fib.timestamp ) { + return -1; + } + // larger timestamp last + return 1; + } else { + throw new ClassCastException("Objects are not Map.Entry"); + } + } + }); + } + + public void run () { + while (running) { + + // sort items from oldest to newest + // we delete the oldest filters + if (cache.size() > cacheCleanSize) { + // empty the temporary set + sortedFilterItems.clear(); + synchronized (cache) { + sortedFilterItems.addAll(cache.entrySet()); + Iterator it = sortedFilterItems.iterator(); + int numToDelete = (int) ((cache.size() - cacheCleanSize) * 1.5); + int counter = 0; + // loop over the set and delete all of the cache entries not used in a while + while (it.hasNext() && counter++ < numToDelete) { + Map.Entry entry = (Map.Entry)it.next(); + cache.remove(entry.getKey()); + } + } + // empty the set so we don't tie up the memory + sortedFilterItems.clear(); + } + // take a nap + try { + Thread.sleep(cleanSleepTime); + } catch (InterruptedException e) { + // just keep going + } + } + } + } +} Index: src/java/org/apache/lucene/search/RemoteCachingWrapperFilter.java =================================================================== --- src/java/org/apache/lucene/search/RemoteCachingWrapperFilter.java (revision 0) +++ src/java/org/apache/lucene/search/RemoteCachingWrapperFilter.java (revision 0) @@ -0,0 +1,58 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.BitSet; + +import org.apache.lucene.index.IndexReader; + +/** + * Provides caching of {@link Filter}s themselves on the remote end of an RMI connection. + * The cache is keyed on Filter's hashCode(), so if it sees the same filter twice + * it will reuse the original version. + *

+ * NOTE: This does NOT cache the Filter bits, but rather the Filter itself. + * Thus, this works hand-in-hand with {@link CachingWrapperFilter} to keep both + * file Filter cache and the Filter bits on the remote end, close to the searcher. + *

+ * Usage: + *

+ * To cache a result you must do something like + * RemoteCachingWrapperFilter f = new RemoteCachingWrapperFilter(new CachingWrapperFilter(myFilter)); + *

+ * @author Matt Ericson + */ +public class RemoteCachingWrapperFilter extends Filter { + protected Filter filter; + + public RemoteCachingWrapperFilter(Filter filter) { + this.filter = filter; + } + + /** + * Uses the {@link FilterManager} to keep the cache for a filter on the + * searcher side of a remote connection. + * @param reader the index reader for the Filter + * @return the bitset + */ + public BitSet bits(IndexReader reader) throws IOException { + Filter cachedFilter = FilterManager.getInstance().getFilter(filter); + return cachedFilter.bits(reader); + } +} Index: src/test/org/apache/lucene/search/TestRemoteCachingWrapperFilter.java =================================================================== --- src/test/org/apache/lucene/search/TestRemoteCachingWrapperFilter.java (revision 0) +++ src/test/org/apache/lucene/search/TestRemoteCachingWrapperFilter.java (revision 0) @@ -0,0 +1,127 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.rmi.Naming; +import java.rmi.registry.LocateRegistry; + +import junit.framework.TestCase; + +import org.apache.lucene.analysis.SimpleAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.RAMDirectory; + +/** + * Tests that the index is cached on the searcher side of things. + * NOTE: This is copied from TestRemoteSearchable since it already had a remote index set up. + * @author Matt Ericson + */ +public class TestRemoteCachingWrapperFilter extends TestCase { + public TestRemoteCachingWrapperFilter(String name) { + super(name); + } + + private static Searchable getRemote() throws Exception { + try { + return lookupRemote(); + } catch (Throwable e) { + startServer(); + return lookupRemote(); + } + } + + private static Searchable lookupRemote() throws Exception { + return (Searchable)Naming.lookup("//localhost/Searchable"); + } + + private static void startServer() throws Exception { + // construct an index + RAMDirectory indexStore = new RAMDirectory(); + IndexWriter writer = new IndexWriter(indexStore,new SimpleAnalyzer(),true); + Document doc = new Document(); + doc.add(new Field("test", "test text", Field.Store.YES, Field.Index.TOKENIZED)); + doc.add(new Field("type", "A", Field.Store.YES, Field.Index.TOKENIZED)); + doc.add(new Field("other", "other test text", Field.Store.YES, Field.Index.TOKENIZED)); + writer.addDocument(doc); + //Need a second document to search for + doc = new Document(); + doc.add(new Field("test", "test text", Field.Store.YES, Field.Index.TOKENIZED)); + doc.add(new Field("type", "B", Field.Store.YES, Field.Index.TOKENIZED)); + doc.add(new Field("other", "other test text", Field.Store.YES, Field.Index.TOKENIZED)); + writer.addDocument(doc); + writer.optimize(); + writer.close(); + + // publish it + LocateRegistry.createRegistry(1099); + Searchable local = new IndexSearcher(indexStore); + RemoteSearchable impl = new RemoteSearchable(local); + Naming.rebind("//localhost/Searchable", impl); + } + + private static void search(Query query, Filter filter, int hitNumber, String typeValue) throws Exception { + Searchable[] searchables = { getRemote() }; + Searcher searcher = new MultiSearcher(searchables); + Hits result = searcher.search(query,filter); + assertEquals(1, result.length()); + Document document = result.doc(hitNumber); + assertTrue("document is null and it shouldn't be", document != null); + assertEquals(typeValue, document.get("type")); + assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 3, document.getFields().size() == 3); + } + + + public void testTermRemoteFilter() throws Exception { + CachingWrapperFilterHelper cwfh = new CachingWrapperFilterHelper(new QueryFilter(new TermQuery(new Term("type", "a")))); + + // This is what we are fixing - if one uses a CachingWrapperFilter(Helper) it will never + // cache the filter on the remote site + cwfh.setShouldHaveCache(false); + search(new TermQuery(new Term("test", "test")), cwfh, 0, "A"); + cwfh.setShouldHaveCache(false); + search(new TermQuery(new Term("test", "test")), cwfh, 0, "A"); + + // This is how we fix caching - we wrap a Filter in the RemoteCachingWrapperFilter(Handler - for testing) + // to cache the Filter on the searcher (remote) side + RemoteCachingWrapperFilterHelper rcwfh = new RemoteCachingWrapperFilterHelper(cwfh, false); + search(new TermQuery(new Term("test", "test")), rcwfh, 0, "A"); + + // 2nd time we do the search, we should be using the cached Filter + rcwfh.shouldHaveCache(true); + search(new TermQuery(new Term("test", "test")), rcwfh, 0, "A"); + + // assert that we get the same cached Filter, even if we create a new instance of RemoteCachingWrapperFilter(Helper) + // this should pass because the Filter parameters are the same, and the cache uses Filter's hashCode() as cache keys, + // and Filters' hashCode() builds on Filter parameters, not the Filter instance itself + rcwfh = new RemoteCachingWrapperFilterHelper(new QueryFilter(new TermQuery(new Term("type", "a"))), false); + rcwfh.shouldHaveCache(false); + search(new TermQuery(new Term("test", "test")), rcwfh, 0, "A"); + + rcwfh = new RemoteCachingWrapperFilterHelper(new QueryFilter(new TermQuery(new Term("type", "a"))), false); + rcwfh.shouldHaveCache(true); + search(new TermQuery(new Term("test", "test")), rcwfh, 0, "A"); + + // assert that we get a non-cached version of the Filter because this is a new Query (type:b) + rcwfh = new RemoteCachingWrapperFilterHelper(new QueryFilter(new TermQuery(new Term("type", "b"))), false); + rcwfh.shouldHaveCache(false); + search(new TermQuery(new Term("type", "b")), rcwfh, 0, "B"); + } +} Index: src/test/org/apache/lucene/search/RemoteCachingWrapperFilterHelper.java =================================================================== --- src/test/org/apache/lucene/search/RemoteCachingWrapperFilterHelper.java (revision 0) +++ src/test/org/apache/lucene/search/RemoteCachingWrapperFilterHelper.java (revision 0) @@ -0,0 +1,60 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.BitSet; + +import junit.framework.TestCase; + +import org.apache.lucene.index.IndexReader; + +/** + * A unit test helper class to help with RemoteCachingWrapperFilter testing and + * assert that it is working correctly. + * @author Matt Ericson + */ +public class RemoteCachingWrapperFilterHelper extends RemoteCachingWrapperFilter { + + private boolean shouldHaveCache; + + public RemoteCachingWrapperFilterHelper(Filter filter, boolean shouldHaveCache) { + super(filter); + this.shouldHaveCache = shouldHaveCache; + } + + public void shouldHaveCache(boolean shouldHaveCache) { + this.shouldHaveCache = shouldHaveCache; + } + + public BitSet bits(IndexReader reader) throws IOException { + Filter cachedFilter = FilterManager.getInstance().getFilter(filter); + + TestCase.assertNotNull("Filter should not be null", cachedFilter); + if (!shouldHaveCache) { + TestCase.assertSame("First time filter should be the same ", filter, cachedFilter); + } else { + TestCase.assertNotSame("We should have a cached version of the filter", filter ,cachedFilter); + } + + if (filter instanceof CachingWrapperFilterHelper) { + ((CachingWrapperFilterHelper)cachedFilter).setShouldHaveCache(shouldHaveCache); + } + return cachedFilter.bits(reader); + } +} Index: src/test/org/apache/lucene/search/CachingWrapperFilterHelper.java =================================================================== --- src/test/org/apache/lucene/search/CachingWrapperFilterHelper.java (revision 0) +++ src/test/org/apache/lucene/search/CachingWrapperFilterHelper.java (revision 0) @@ -0,0 +1,80 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.BitSet; +import java.util.WeakHashMap; + +import junit.framework.TestCase; + +import org.apache.lucene.index.IndexReader; + +/** + * A unit test helper class to test when the filter is getting cached and when it is not. + */ +public class CachingWrapperFilterHelper extends CachingWrapperFilter { + + private boolean shouldHaveCache = false; + + /** + * @param filter Filter to cache results of + */ + public CachingWrapperFilterHelper(Filter filter) { + super(filter); + } + + public void setShouldHaveCache(boolean shouldHaveCache) { + this.shouldHaveCache = shouldHaveCache; + } + + public BitSet bits(IndexReader reader) throws IOException { + if (cache == null) { + cache = new WeakHashMap(); + } + + synchronized (cache) { // check cache + BitSet cached = (BitSet) cache.get(reader); + if (shouldHaveCache) { + TestCase.assertNotNull("Cache should have data ", cached); + } else { + TestCase.assertNull("Cache should be null " + cached , cached); + } + if (cached != null) { + return cached; + } + } + + final BitSet bits = filter.bits(reader); + + synchronized (cache) { // update cache + cache.put(reader, bits); + } + + return bits; + } + + public String toString() { + return "CachingWrapperFilterHelper("+filter+")"; + } + + public boolean equals(Object o) { + if (!(o instanceof CachingWrapperFilterHelper)) return false; + return this.filter.equals((CachingWrapperFilterHelper)o); + } +}