+ * The row may only contains the path, if a path is available. It may also + * (or just) contain so-called "pseudo-properties" such as "jcr:score" and + * "rep:excerpt", in case the index supports those properties and if the + * properties were requested when running the query. The query engine will + * indicate that those pseudo properties were requested by setting an + * appropriate (possibly unrestricted) filter condition. + *
+ * The index should return a row with those properties that are stored in + * the index itself, so that the query engine doesn't have to load the whole + * row / node unnecessarily (avoiding to load the whole row is sometimes + * called "index only scan"), specially for rows that are anyway skipped. If + * the index does not have an (efficient) way to return some (or any) of the + * properties, it doesn't have to provide those values. In this case, the + * query engine will load the node itself if required. If all conditions + * match, the query engine will sometimes load the node to do access checks, + * but this is not always the case, and it is not the case if any of the + * (join) conditions do not match. + * + * @return the row + */ + @Override + IndexRow next(); + + /** + * Get the size if known. + * + * @param precision the required precision + * @param max the maximum nodes read (for an exact size) + * @return the size, or -1 if unknown + */ + long getSize(SizePrecision precision, long max); + +} Property changes on: oak-query-spi/src/main/java/org/apache/jackrabbit/oak/spi/query/Cursor.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +Author Date Id Revision Rev URL \ No newline at end of property Index: oak-query-spi/src/main/java/org/apache/jackrabbit/oak/spi/query/Filter.java =================================================================== --- oak-query-spi/src/main/java/org/apache/jackrabbit/oak/spi/query/Filter.java (nonexistent) +++ oak-query-spi/src/main/java/org/apache/jackrabbit/oak/spi/query/Filter.java (working copy) @@ -0,0 +1,525 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.spi.query; + +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Set; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import javax.jcr.PropertyType; + +import org.apache.jackrabbit.oak.api.PropertyValue; +import org.apache.jackrabbit.oak.commons.PathUtils; +import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextExpression; + +/** + * The filter for an index lookup that contains a number of restrictions that + * are combined with AND. Possible restrictions are a property restriction, a + * path restriction, a node type restriction, and a fulltext restriction. + *
+ * A property restriction could be that the property must exist, or that the + * property value has to be within a certain range. + *
+ * A path restriction could be a restriction to a certain subtree, a parent of a
+ * certain path, or equality to a certain path.
+ */
+public interface Filter {
+
+ /**
+ * Get the list of property restrictions, if any. Each property may contain
+ * multiple restrictions, for example x=1 and x=2. For this case, only
+ * multi-valued properties match that contain both 1 and 2.
+ *
+ * @return the conditions (an empty collection if not used)
+ */
+ Collection
+ * The query engine will pick the index that returns the lowest cost for the
+ * given filter conditions.
+ *
+ * The index should only use that part of the filter that speeds up data lookup.
+ * All other filter conditions should be ignored and not evaluated within this
+ * index, because the query engine will in any case evaluate the condition (and
+ * join condition), so that evaluating the conditions within the index would
+ * actually slow down processing. For example, an index on the property
+ * "lastName" should not try to evaluate any other restrictions than those on
+ * the property "lastName", even if the query contains other restrictions. For
+ * the query "where lastName = 'x' and firstName = 'y'", the query engine will
+ * set two filter conditions, one for "lastName" and another for "firstName".
+ * The index on "lastName" should not evaluate the condition on "firstName",
+ * even thought it will be set in the filter.
+ */
+public interface QueryIndex {
+
+ /**
+ * Returns the minimum cost which {@link #getCost(Filter, NodeState)} would return in the best possible case.
+ *
+ * The implementation should return a static/cached value because it is called very often.
+ *
+ * @return the minimum cost for the index
+ */
+ double getMinimumCost();
+
+ /**
+ * Estimate the worst-case cost to query with the given filter. The returned
+ * cost is a value between 1 (very fast; lookup of a unique node) and the
+ * estimated number of entries to traverse, if the cursor would be fully
+ * read, and if there could in theory be one network roundtrip or disk read
+ * operation per node (this method may return a lower number if the data is
+ * known to be fully in memory).
+ *
+ * The returned value is supposed to be an estimate and doesn't have to be
+ * very accurate. Please note this method is called on each index whenever a
+ * query is run, so the method should be reasonably fast (not read any data
+ * itself, or at least not read too much data).
+ *
+ * If an index implementation can not query the data, it has to return
+ * {@code Double.MAX_VALUE}.
+ *
+ * @param filter the filter
+ * @param rootState root state of the current repository snapshot
+ * @return the estimated cost in number of read nodes
+ */
+ double getCost(Filter filter, NodeState rootState);
+
+ /**
+ * Query the index. The returned cursor is supposed to return as few nodes
+ * as possible, but may return more nodes than necessary.
+ *
+ * An implementation should only filter the result if it can do so easily
+ * and efficiently; the query engine will verify the data again (in memory)
+ * and check for access rights.
+ *
+ * The method is only called if this index is used for the given query and
+ * selector, which is only the case if the given index implementation
+ * returned the lowest cost for the given filter. If the implementation
+ * returned {@code Double.MAX_VALUE} in the getCost method for the given
+ * filter, then this method is not called. If it is still called, then it is
+ * supposed to throw an exception (as it would be an internal error of the
+ * query engine).
+ *
+ * @param filter the filter
+ * @param rootState root state of the current repository snapshot
+ * @return a cursor to iterate over the result
+ */
+ Cursor query(Filter filter, NodeState rootState);
+
+ /**
+ * Get the query plan for the given filter. This method is called when
+ * running an {@code EXPLAIN SELECT} query, or for logging purposes. The
+ * result should be human readable.
+ *
+ * @param filter the filter
+ * @param rootState root state of the current repository snapshot
+ * @return the query plan
+ */
+ String getPlan(Filter filter, NodeState rootState);
+
+ /**
+ * Get the unique index name.
+ *
+ * @return the index name
+ */
+ String getIndexName();
+
+ /**
+ * A marker interface which means this index supports executing native queries
+ */
+ interface NativeQueryIndex {
+ // a marker interface
+ }
+
+ /**
+ * A marker interface which means this index supports may support more than
+ * just the minimal fulltext query syntax. If this index is used, then the
+ * query engine does not verify the fulltext constraint(s) for the given
+ * selector.
+ */
+ interface FulltextQueryIndex extends QueryIndex, NativeQueryIndex {
+
+ /**
+ * Returns the NodeAggregator responsible for providing the aggregation
+ * settings or null if aggregation is not available/desired.
+ *
+ * @return the node aggregator or null
+ */
+ @CheckForNull
+ NodeAggregator getNodeAggregator();
+
+ }
+
+ interface AdvanceFulltextQueryIndex extends FulltextQueryIndex, AdvancedQueryIndex {
+ // a marker interface
+ }
+
+ /**
+ * An query index that may support using multiple access orders
+ * (returning the rows in a specific order), and that can provide detailed
+ * information about the cost.
+ */
+ interface AdvancedQueryIndex {
+
+ /**
+ * Return the possible index plans for the given filter and sort order.
+ * Please note this method is supposed to run quickly. That means it
+ * should usually not read any data from the storage.
+ *
+ * @param filter the filter
+ * @param sortOrder the sort order or null if no sorting is required
+ * @param rootState root state of the current repository snapshot
+ * @return the list of index plans (null if none)
+ */
+ List
+ * The index plan is one of the plans that the index returned in the
+ * getPlans call.
+ *
+ * @param plan the index plan
+ * @param root root state of the current repository snapshot
+ * @return the query plan description
+ */
+ String getPlanDescription(IndexPlan plan, NodeState root);
+
+ /**
+ * Start a query. The filter and sort order of the index plan is to be
+ * used.
+ *
+ * The index plan is one of the plans that the index returned in the
+ * getPlans call.
+ *
+ * @param plan the index plan to use
+ * @param rootState root state of the current repository snapshot
+ * @return a cursor to iterate over the result
+ */
+ Cursor query(IndexPlan plan, NodeState rootState);
+
+ }
+
+ /**
+ * An index plan.
+ */
+ @ProviderType
+ interface IndexPlan extends Cloneable{
+
+ /**
+ * The cost to execute the query once. The returned value should
+ * approximately match the number of disk read operations plus the
+ * number of network roundtrips (worst case).
+ *
+ * @return the cost per execution, in estimated number of I/O operations
+ */
+ double getCostPerExecution();
+
+ /**
+ * The cost to read one entry from the cursor. The returned value should
+ * approximately match the number of disk read operations plus the
+ * number of network roundtrips (worst case).
+ *
+ * @return the lookup cost per entry, in estimated number of I/O operations
+ */
+ double getCostPerEntry();
+
+ /**
+ * The estimated number of entries in the cursor that is returned by the query method,
+ * when using this plan. This value does not have to be accurate.
+ *
+ * @return the estimated number of entries
+ */
+ long getEstimatedEntryCount();
+
+ /**
+ * The filter to use.
+ *
+ * @return the filter
+ */
+ Filter getFilter();
+
+ /**
+ * Use the given filter.
+ */
+ void setFilter(Filter filter);
+
+ /**
+ * Whether the index is not always up-to-date.
+ *
+ * @return whether the index might be updated asynchronously
+ */
+ boolean isDelayed();
+
+ /**
+ * Whether the fulltext part of the filter is evaluated (possibly with
+ * an extended syntax). If set, the fulltext part of the filter is not
+ * evaluated any more within the query engine.
+ *
+ * @return whether the index supports full-text extraction
+ */
+ boolean isFulltextIndex();
+
+ /**
+ * Whether the cursor is able to read all properties from a node.
+ * If yes, then the query engine will not have to read the data itself.
+ *
+ * @return wheter node data is returned
+ */
+ boolean includesNodeData();
+
+ /**
+ * The sort order of the returned entries, or null if unsorted.
+ *
+ * @return the sort order
+ */
+ Listtrue if path can be read by the calling user, false otherwise.
+ */
+ boolean isAccessible(String path);
+
+ /**
+ * Whether the filter contains a native condition.
+ *
+ * @return true if it does
+ */
+ boolean containsNativeConstraint();
+
+ /**
+ * Get the most restrictive property restriction for the given property, if
+ * any.
+ *
+ * @param propertyName the property name
+ * @return the first restriction, or null if there is no restriction for
+ * this property
+ */
+ PropertyRestriction getPropertyRestriction(String propertyName);
+
+ /**
+ * Get the all property restriction for the given property.
+ *
+ * @param propertyName the property name
+ * @return the list of restrictions (possibly empty, never null)
+ */
+ Listnull if
+ * this index plan isn't base on a property restriction. E.g. a plan
+ * based on an order by clause in the query.
+ *
+ * @return the restriction this plan is based on or null.
+ */
+ @CheckForNull
+ PropertyRestriction getPropertyRestriction();
+
+ /**
+ * Creates a cloned copy of current plan. Mostly used when the filter needs to be
+ * modified for a given call
+ *
+ * @return clone of current plan
+ */
+ IndexPlan copy();
+
+ /**
+ * Returns the value of the named attribute as an Object,
+ * or null if no attribute of the given name exists.
+ *
+ * @param name String specifying the name of
+ * the attribute
+ *
+ * @return an Object containing the value
+ * of the attribute, or null if the attribute does not exist
+ */
+ @CheckForNull
+ Object getAttribute(String name);
+
+ /**
+ * Get the unique plan name.
+ *
+ * @return the plan name
+ */
+ @CheckForNull
+ String getPlanName();
+
+ /**
+ * A builder for index plans.
+ */
+ class Builder {
+
+ protected double costPerExecution = 1.0;
+ protected double costPerEntry = 1.0;
+ protected long estimatedEntryCount = 1000000;
+ protected Filter filter;
+ protected boolean isDelayed;
+ protected boolean isFulltextIndex;
+ protected boolean includesNodeData;
+ protected List
+ * FullTextSearchLiteral ::= Disjunct {' OR ' Disjunct}
+ * Disjunct ::= Term {' ' Term}
+ * Term ::= ['-'] SimpleTerm
+ * SimpleTerm ::= Word | '"' Word {' ' Word} '"'
+ *
+ */
+public class FullTextParser {
+
+ /**
+ * Compatibility for Jackrabbit 2.0 single quoted phrase queries.
+ * (contains(., "word ''hello world'' word")
+ * These are queries that delimit a phrase with a single quote
+ * instead, as in the spec, using double quotes.
+ */
+ private static final boolean JACKRABBIT_2_SINGLE_QUOTED_PHRASE = true;
+
+ private String propertyName;
+ private String text;
+ private int parseIndex;
+
+ public static FullTextExpression parse(String propertyName, String text) throws ParseException {
+ FullTextParser p = new FullTextParser();
+ p.propertyName = propertyName;
+ p.text = text;
+ FullTextExpression e = p.parseOr();
+ return e;
+ }
+
+ FullTextExpression parseOr() throws ParseException {
+ ArrayList