diff --git data/scripts/q_test_init.sql data/scripts/q_test_init.sql index 24eff64..5d36b6f 100644 --- data/scripts/q_test_init.sql +++ data/scripts/q_test_init.sql @@ -82,7 +82,7 @@ LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/bucketed_files/000000_0" INTO LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/bucketed_files/000001_0" INTO TABLE srcbucket_tmp; INSERT INTO srcbucket SELECT * FROM srcbucket_tmp; DROP TABLE srcbucket_tmp; - + ANALYZE TABLE srcbucket COMPUTE STATISTICS; ANALYZE TABLE srcbucket COMPUTE STATISTICS FOR COLUMNS key,value; diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java index a055b71..cd01094 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java @@ -41,6 +41,7 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.TableScan; import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rel.type.RelDataTypeImpl; import org.apache.calcite.rex.RexBuilder; @@ -339,11 +340,7 @@ private static RelNode createMaterializedViewScan(HiveConf conf, Table viewTable else { fullyQualifiedTabName = viewTable.getTableName(); } - RelOptHiveTable optTable = new RelOptHiveTable(null, fullyQualifiedTabName, - rowType, viewTable, nonPartitionColumns, partitionColumns, new ArrayList(), - conf, new HashMap(), - new HashMap(), new AtomicInteger()); - RelNode rel; + RelNode tableRel; // 3. Build operator if (obtainTableType(viewTable) == TableType.DRUID) { @@ -354,8 +351,16 @@ private static RelNode createMaterializedViewScan(HiveConf conf, Table viewTable Set metrics = new HashSet<>(); List druidColTypes = new ArrayList<>(); List druidColNames = new ArrayList<>(); + //@NOTE this code is very similar to the code at org/apache/hadoop/hive/ql/parse/CalcitePlanner.java:2362 + //@TODO it will be nice to refactor it + RelDataTypeFactory dtFactory = cluster.getRexBuilder().getTypeFactory(); for (RelDataTypeField field : rowType.getFieldList()) { - druidColTypes.add(field.getType()); + if (DruidTable.DEFAULT_TIMESTAMP_COLUMN.equals(field.getName())) { + // Druid's time column is always not null. + druidColTypes.add(dtFactory.createTypeWithNullability(field.getType(), false)); + } else { + druidColTypes.add(field.getType()); + } druidColNames.add(field.getName()); if (field.getName().equals(DruidTable.DEFAULT_TIMESTAMP_COLUMN)) { // timestamp @@ -369,21 +374,28 @@ private static RelNode createMaterializedViewScan(HiveConf conf, Table viewTable } List intervals = Arrays.asList(DruidTable.DEFAULT_INTERVAL); + rowType = dtFactory.createStructType(druidColTypes, druidColNames); + RelOptHiveTable optTable = new RelOptHiveTable(null, fullyQualifiedTabName, + rowType, viewTable, nonPartitionColumns, partitionColumns, new ArrayList<>(), + conf, new HashMap<>(), new HashMap<>(), new AtomicInteger()); - final DruidTable druidTable = new DruidTable(new DruidSchema(address, address, false), + DruidTable druidTable = new DruidTable(new DruidSchema(address, address, false), dataSource, RelDataTypeImpl.proto(rowType), metrics, DruidTable.DEFAULT_TIMESTAMP_COLUMN, intervals, null, null); final TableScan scan = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, viewTable.getTableName(), null, false, false); - rel = DruidQuery.create(cluster, cluster.traitSetOf(BindableConvention.INSTANCE), + tableRel = DruidQuery.create(cluster, cluster.traitSetOf(BindableConvention.INSTANCE), optTable, druidTable, ImmutableList.of(scan)); } else { // Build Hive Table Scan Rel - rel = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, + RelOptHiveTable optTable = new RelOptHiveTable(null, fullyQualifiedTabName, + rowType, viewTable, nonPartitionColumns, partitionColumns, new ArrayList<>(), + conf, new HashMap<>(), new HashMap<>(), new AtomicInteger()); + tableRel = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, viewTable.getTableName(), null, false, false); } - return rel; + return tableRel; } private static RelNode parseQuery(HiveConf conf, String viewQuery) { @@ -413,6 +425,7 @@ private static TableType obtainTableType(Table tabMetaData) { return TableType.NATIVE; } + //@TODO this seems to be the same as org.apache.hadoop.hive.ql.parse.CalcitePlanner.TableType.DRUID do we really need both private enum TableType { DRUID, NATIVE diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java index 96fa2df..ce720a8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java @@ -16,22 +16,30 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptPredicateList; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.rules.ReduceExpressionsRule; +import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexExecutor; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.rex.RexSimplify; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.tools.RelBuilderFactory; +import org.apache.calcite.util.Util; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 8e0a454..3520d90 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -1427,7 +1427,6 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu this.relOptSchema = relOptSchema; PerfLogger perfLogger = SessionState.getPerfLogger(); - // 1. Gen Calcite Plan perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); try { @@ -1477,7 +1476,6 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu calciteGenPlan = HiveRelDecorrelator.decorrelateQuery(calciteGenPlan); LOG.debug("Plan after decorrelation:\n" + RelOptUtil.toString(calciteGenPlan)); - // 2. Apply pre-join order optimizations calcitePreCboPlan = applyPreJoinOrderingTransforms(calciteGenPlan, mdProvider.getMetadataProvider(), executorProvider); @@ -1841,7 +1839,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv rules.toArray(new RelOptRule[rules.size()])); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, PPD, not null predicates, transitive inference, constant folding"); - +// it is happening at 1762 // 4. Push down limit through outer join // NOTE: We run this after PPD to support old style join syntax. // Ex: select * from R1 left outer join R2 where ((R1.x=R2.x) and R1.y<10) or @@ -2463,18 +2461,23 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc else { fullyQualifiedTabName = tabMetaData.getTableName(); } - RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, fullyQualifiedTabName, - rowType, tabMetaData, nonPartitionColumns, partitionColumns, virtualCols, conf, - partitionCache, colStatsCache, noColsMissingStats); + // Build Druid query String address = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_DRUID_BROKER_DEFAULT_ADDRESS); String dataSource = tabMetaData.getParameters().get(Constants.DRUID_DATA_SOURCE); Set metrics = new HashSet<>(); + RexBuilder rexBuilder = cluster.getRexBuilder(); + RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory(); List druidColTypes = new ArrayList<>(); List druidColNames = new ArrayList<>(); for (RelDataTypeField field : rowType.getFieldList()) { - druidColTypes.add(field.getType()); + if (DruidTable.DEFAULT_TIMESTAMP_COLUMN.equals(field.getName())) { + // Druid's time column is always not null. + druidColTypes.add(dtFactory.createTypeWithNullability(field.getType(), false)); + } else { + druidColTypes.add(field.getType()); + } druidColNames.add(field.getName()); if (field.getName().equals(DruidTable.DEFAULT_TIMESTAMP_COLUMN)) { // timestamp @@ -2488,10 +2491,13 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc } List intervals = Arrays.asList(DruidTable.DEFAULT_INTERVAL); - + rowType = dtFactory.createStructType(druidColTypes, druidColNames); DruidTable druidTable = new DruidTable(new DruidSchema(address, address, false), dataSource, RelDataTypeImpl.proto(rowType), metrics, DruidTable.DEFAULT_TIMESTAMP_COLUMN, intervals, null, null); + RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, fullyQualifiedTabName, + rowType, tabMetaData, nonPartitionColumns, partitionColumns, virtualCols, conf, + partitionCache, colStatsCache, noColsMissingStats); final TableScan scan = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, null == tableAlias ? tabMetaData.getTableName() : tableAlias, getAliasId(tableAlias, qb), HiveConf.getBoolVar(conf, @@ -3457,7 +3463,7 @@ private RelNode genOBLogicalPlan(QB qb, Pair selPair, ASTNode ref = (ASTNode) nullObASTExpr.getChild(0); Map astToExprNDescMap = null; ExprNodeDesc obExprNDesc = null; - + boolean isBothByPos = HiveConf.getBoolVar(conf, ConfVars.HIVE_GROUPBY_ORDERBY_POSITION_ALIAS); boolean isObyByPos = isBothByPos || HiveConf.getBoolVar(conf, ConfVars.HIVE_ORDERBY_POSITION_ALIAS); diff --git ql/src/test/queries/clientpositive/druid_timeseries.q ql/src/test/queries/clientpositive/druid_timeseries.q index f784f26..bdcd148 100644 --- ql/src/test/queries/clientpositive/druid_timeseries.q +++ ql/src/test/queries/clientpositive/druid_timeseries.q @@ -4,9 +4,13 @@ CREATE EXTERNAL TABLE druid_table_1 STORED BY 'org.apache.hadoop.hive.druid.QTestDruidStorageHandler' TBLPROPERTIES ("druid.datasource" = "wikipedia"); -DESCRIBE FORMATTED druid_table_1; +-- DESCRIBE FORMATTED druid_table_1; -- GRANULARITY: ALL +EXPLAIN SELECT count(`__time`) from druid_table_1 where `__time` >= '2010-01-01 00:00:00 UTC' AND `__time` <= '2012-03-01 00:00:00 UTC' OR added <= 0; + +EXPLAIN SELECT count(`__time`) from druid_table_1 where `__time` <= '2010-01-01 00:00:00 UTC'; + EXPLAIN SELECT max(added), sum(variation) FROM druid_table_1; @@ -92,3 +96,14 @@ FROM ) subq WHERE subq.h BETWEEN CAST('2010-01-01 00:00:00' AS TIMESTAMP WITH LOCAL TIME ZONE) AND CAST('2014-01-01 00:00:00' AS TIMESTAMP WITH LOCAL TIME ZONE); + +-- Simplification of count(__time) as count(*) since time column is not null +EXPLAIN SELECT count(`__time`) from druid_table_1; + + +EXPLAIN SELECT count(`__time`) from druid_table_1 where `__time` <= '2010-01-01 00:00:00 UTC'; + +EXPLAIN SELECT count(`__time`) from druid_table_1 where `__time` >= '2010-01-01 00:00:00'; + + +EXPLAIN SELECT count(`__time`) from druid_table_1 where `__time` <= '2010-01-01 00:00:00' OR `__time` <= '2012-03-01 00:00:00'; diff --git ql/src/test/results/clientpositive/druid_intervals.q.out ql/src/test/results/clientpositive/druid_intervals.q.out index c94cbe9..89587b0 100644 --- ql/src/test/results/clientpositive/druid_intervals.q.out +++ ql/src/test/results/clientpositive/druid_intervals.q.out @@ -16,49 +16,49 @@ PREHOOK: Input: default@druid_table_1 POSTHOOK: query: DESCRIBE FORMATTED druid_table_1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@druid_table_1 -# col_name data_type comment -__time timestamp with local time zone from deserializer -robot string from deserializer -namespace string from deserializer -anonymous string from deserializer -unpatrolled string from deserializer -page string from deserializer -language string from deserializer -newpage string from deserializer -user string from deserializer -count float from deserializer -added float from deserializer -delta float from deserializer -variation float from deserializer -deleted float from deserializer - -# Detailed Table Information -Database: default +# col_name data_type comment +__time timestamp with local time zone from deserializer +robot string from deserializer +namespace string from deserializer +anonymous string from deserializer +unpatrolled string from deserializer +page string from deserializer +language string from deserializer +newpage string from deserializer +user string from deserializer +count float from deserializer +added float from deserializer +delta float from deserializer +variation float from deserializer +deleted float from deserializer + +# Detailed Table Information +Database: default #### A masked pattern was here #### -Retention: 0 +Retention: 0 #### A masked pattern was here #### -Table Type: EXTERNAL_TABLE -Table Parameters: +Table Type: EXTERNAL_TABLE +Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"__time\":\"true\",\"added\":\"true\",\"anonymous\":\"true\",\"count\":\"true\",\"deleted\":\"true\",\"delta\":\"true\",\"language\":\"true\",\"namespace\":\"true\",\"newpage\":\"true\",\"page\":\"true\",\"robot\":\"true\",\"unpatrolled\":\"true\",\"user\":\"true\",\"variation\":\"true\"}} - EXTERNAL TRUE - druid.datasource wikipedia - numFiles 0 - numRows 0 - rawDataSize 0 + EXTERNAL TRUE + druid.datasource wikipedia + numFiles 0 + numRows 0 + rawDataSize 0 storage_handler org.apache.hadoop.hive.druid.QTestDruidStorageHandler - totalSize 0 + totalSize 0 #### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.druid.QTestDruidSerDe -InputFormat: null -OutputFormat: null -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 + +# Storage Information +SerDe Library: org.apache.hadoop.hive.druid.QTestDruidSerDe +InputFormat: null +OutputFormat: null +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 PREHOOK: query: EXPLAIN SELECT `__time` FROM druid_table_1 diff --git ql/src/test/results/clientpositive/druid_timeseries.q.out ql/src/test/results/clientpositive/druid_timeseries.q.out index 264d2d1..a84584c 100644 --- ql/src/test/results/clientpositive/druid_timeseries.q.out +++ ql/src/test/results/clientpositive/druid_timeseries.q.out @@ -10,55 +10,83 @@ TBLPROPERTIES ("druid.datasource" = "wikipedia") POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@druid_table_1 -PREHOOK: query: DESCRIBE FORMATTED druid_table_1 -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@druid_table_1 -POSTHOOK: query: DESCRIBE FORMATTED druid_table_1 -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@druid_table_1 -# col_name data_type comment -__time timestamp with local time zone from deserializer -robot string from deserializer -namespace string from deserializer -anonymous string from deserializer -unpatrolled string from deserializer -page string from deserializer -language string from deserializer -newpage string from deserializer -user string from deserializer -count float from deserializer -added float from deserializer -delta float from deserializer -variation float from deserializer -deleted float from deserializer - -# Detailed Table Information -Database: default -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: EXTERNAL_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"__time\":\"true\",\"added\":\"true\",\"anonymous\":\"true\",\"count\":\"true\",\"deleted\":\"true\",\"delta\":\"true\",\"language\":\"true\",\"namespace\":\"true\",\"newpage\":\"true\",\"page\":\"true\",\"robot\":\"true\",\"unpatrolled\":\"true\",\"user\":\"true\",\"variation\":\"true\"}} - EXTERNAL TRUE - druid.datasource wikipedia - numFiles 0 - numRows 0 - rawDataSize 0 - storage_handler org.apache.hadoop.hive.druid.QTestDruidStorageHandler - totalSize 0 -#### A masked pattern was here #### - -# Storage Information -SerDe Library: org.apache.hadoop.hive.druid.QTestDruidSerDe -InputFormat: null -OutputFormat: null -Compressed: No -Num Buckets: -1 -Bucket Columns: [] -Sort Columns: [] -Storage Desc Params: - serialization.format 1 +PREHOOK: query: EXPLAIN SELECT count(`__time`) from druid_table_1 where `__time` >= '2010-01-01 00:00:00 UTC' AND `__time` <= '2012-03-01 00:00:00 UTC' OR added <= 0 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT count(`__time`) from druid_table_1 where `__time` >= '2010-01-01 00:00:00 UTC' AND `__time` <= '2012-03-01 00:00:00 UTC' OR added <= 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: druid_table_1 + filterExpr: (((__time >= 2009-12-31 16:00:00.0 US/Pacific) and (__time <= 2012-02-29 16:00:00.0 US/Pacific)) or (added <= 0)) (type: boolean) + properties: + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} + druid.query.type select + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (((__time >= 2009-12-31 16:00:00.0 US/Pacific) and (__time <= 2012-02-29 16:00:00.0 US/Pacific)) or (added <= 0)) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT count(`__time`) from druid_table_1 where `__time` <= '2010-01-01 00:00:00 UTC' +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT count(`__time`) from druid_table_1 where `__time` <= '2010-01-01 00:00:00 UTC' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1 + properties: + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"count","name":"$f0"}],"intervals":["1900-01-01T00:00:00.000/2010-01-01T00:00:00.001"],"context":{"skipEmptyBuckets":true}} + druid.query.type timeseries + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: $f0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + ListSink + PREHOOK: query: EXPLAIN SELECT max(added), sum(variation) FROM druid_table_1 @@ -537,3 +565,128 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: EXPLAIN SELECT count(`__time`) from druid_table_1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT count(`__time`) from druid_table_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1 + properties: + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"count","name":"$f0"}],"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}} + druid.query.type timeseries + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: $f0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + ListSink + +PREHOOK: query: EXPLAIN SELECT count(`__time`) from druid_table_1 where `__time` <= '2010-01-01 00:00:00 UTC' +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT count(`__time`) from druid_table_1 where `__time` <= '2010-01-01 00:00:00 UTC' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1 + properties: + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"count","name":"$f0"}],"intervals":["1900-01-01T00:00:00.000/2010-01-01T00:00:00.001"],"context":{"skipEmptyBuckets":true}} + druid.query.type timeseries + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: $f0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + ListSink + +PREHOOK: query: EXPLAIN SELECT count(`__time`) from druid_table_1 where `__time` >= '2010-01-01 00:00:00' +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT count(`__time`) from druid_table_1 where `__time` >= '2010-01-01 00:00:00' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_1 + properties: + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","aggregations":[{"type":"count","name":"$f0"}],"intervals":["2010-01-01T08:00:00.000/3000-01-01T00:00:00.000"],"context":{"skipEmptyBuckets":true}} + druid.query.type timeseries + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: $f0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + ListSink + +PREHOOK: query: EXPLAIN SELECT count(`__time`) from druid_table_1 where `__time` <= '2010-01-01 00:00:00' OR `__time` <= '2012-03-01 00:00:00' +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT count(`__time`) from druid_table_1 where `__time` <= '2010-01-01 00:00:00' OR `__time` <= '2012-03-01 00:00:00' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: druid_table_1 + filterExpr: ((__time <= 2010-01-01 00:00:00.0 US/Pacific) or (__time <= 2012-03-01 00:00:00.0 US/Pacific)) (type: boolean) + properties: + druid.query.json {"queryType":"select","dataSource":"wikipedia","descending":false,"intervals":["1900-01-01T00:00:00.000/3000-01-01T00:00:00.000"],"dimensions":["robot","namespace","anonymous","unpatrolled","page","language","newpage","user"],"metrics":["count","added","delta","variation","deleted"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} + druid.query.type select + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((__time <= 2010-01-01 00:00:00.0 US/Pacific) or (__time <= 2012-03-01 00:00:00.0 US/Pacific)) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +