diff --git itests/util/src/main/java/org/apache/hadoop/hive/cli/control/AbstractCliConfig.java itests/util/src/main/java/org/apache/hadoop/hive/cli/control/AbstractCliConfig.java index d38810f..0b12928 100644 --- itests/util/src/main/java/org/apache/hadoop/hive/cli/control/AbstractCliConfig.java +++ itests/util/src/main/java/org/apache/hadoop/hive/cli/control/AbstractCliConfig.java @@ -43,9 +43,9 @@ public static final String HIVE_ROOT = HiveTestEnvSetup.HIVE_ROOT; - public static enum MetastoreType { + enum MetastoreType { sql - }; + } private MetastoreType metastoreType = MetastoreType.sql; private String queryFile; diff --git itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java index 566d02b..7034c38 100644 --- itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java +++ itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java @@ -177,7 +177,7 @@ public MiniDruidCliConfig() { setInitScript("q_test_druid_init.sql"); setCleanupScript("q_test_cleanup_druid.sql"); - setHiveConfDir(""); + setHiveConfDir("data/conf/llap"); setClusterType(MiniClusterType.druid); setMetastoreType(MetastoreType.sql); setFsType(QTestUtil.FsType.hdfs); diff --git itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java index 6cd7a13..45602a2 100644 --- itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java +++ itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java @@ -456,8 +456,7 @@ private void createRemoteDirs() { private enum CoreClusterType { MR, TEZ, - SPARK, - DRUID + SPARK } public enum FsType { @@ -476,7 +475,7 @@ private void createRemoteDirs() { llap(CoreClusterType.TEZ, FsType.hdfs), llap_local(CoreClusterType.TEZ, FsType.local), none(CoreClusterType.MR, FsType.local), - druid(CoreClusterType.DRUID, FsType.hdfs); + druid(CoreClusterType.TEZ, FsType.hdfs); private final CoreClusterType coreClusterType; @@ -648,6 +647,27 @@ private void setupMiniCluster(HadoopShims shims, String confDir) throws String uriString = fs.getUri().toString(); + if (clusterType == MiniClusterType.druid) { + final String tempDir = System.getProperty("test.tmp.dir"); + druidCluster = new MiniDruidCluster("mini-druid", + getLogDirectory(), + tempDir, + setup.zkPort, + Utilities.jarFinderGetJar(MiniDruidCluster.class) + ); + final Path druidDeepStorage = fs.makeQualified(new Path(druidCluster.getDeepStorageDir())); + fs.mkdirs(druidDeepStorage); + conf.set("hive.druid.storage.storageDirectory", druidDeepStorage.toUri().getPath()); + conf.set("hive.druid.metadata.db.type", "derby"); + conf.set("hive.druid.metadata.uri", druidCluster.getMetadataURI()); + final Path scratchDir = fs + .makeQualified(new Path(System.getProperty("test.tmp.dir"), "druidStagingDir")); + fs.mkdirs(scratchDir); + conf.set("hive.druid.working.directory", scratchDir.toUri().getPath()); + druidCluster.init(conf); + druidCluster.start(); + } + if (clusterType.getCoreClusterType() == CoreClusterType.TEZ) { if (confDir != null && !confDir.isEmpty()) { conf.addResource(new URL("file://" + new File(confDir).toURI().getPath() @@ -668,18 +688,6 @@ private void setupMiniCluster(HadoopShims shims, String confDir) throws mr = shims.getMiniSparkCluster(conf, 2, uriString, 1); } else if (clusterType == MiniClusterType.mr) { mr = shims.getMiniMrCluster(conf, 2, uriString, 1); - } else if (clusterType == MiniClusterType.druid) { - final String tempDir = System.getProperty("test.tmp.dir"); - druidCluster = new MiniDruidCluster("mini-druid", - getLogDirectory(), - tempDir, - setup.zkPort, - Utilities.jarFinderGetJar(MiniDruidCluster.class) - ); - druidCluster.init(conf); - final Path druidDeepStorage = fs.makeQualified(new Path(druidCluster.getDeepStorageDir())); - fs.mkdirs(druidDeepStorage); - druidCluster.start(); } } diff --git ql/src/test/results/clientpositive/druid/druid_timestamptz.q.out ql/src/test/results/clientpositive/druid/druid_timestamptz.q.out index 0ffaaf4..f2b5e8d 100644 --- ql/src/test/results/clientpositive/druid/druid_timestamptz.q.out +++ ql/src/test/results/clientpositive/druid/druid_timestamptz.q.out @@ -27,54 +27,54 @@ POSTHOOK: Output: default@tstz1 PREHOOK: query: select `__time` from tstz1 PREHOOK: type: QUERY PREHOOK: Input: default@tstz1 -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select `__time` from tstz1 POSTHOOK: type: QUERY POSTHOOK: Input: default@tstz1 -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 2016-01-03 12:26:34.0 US/Pacific PREHOOK: query: select cast(`__time` as timestamp) from tstz1 PREHOOK: type: QUERY PREHOOK: Input: default@tstz1 -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select cast(`__time` as timestamp) from tstz1 POSTHOOK: type: QUERY POSTHOOK: Input: default@tstz1 -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 2016-01-03 12:26:34 PREHOOK: query: select cast(`__time` as timestamp) from tstz1 where `__time` >= cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone) PREHOOK: type: QUERY PREHOOK: Input: default@tstz1 -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select cast(`__time` as timestamp) from tstz1 where `__time` >= cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone) POSTHOOK: type: QUERY POSTHOOK: Input: default@tstz1 -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 2016-01-03 12:26:34 PREHOOK: query: select `__time` from tstz1 PREHOOK: type: QUERY PREHOOK: Input: default@tstz1 -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select `__time` from tstz1 POSTHOOK: type: QUERY POSTHOOK: Input: default@tstz1 -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 2016-01-03 20:26:34.0 UTC PREHOOK: query: select cast(`__time` as timestamp) from tstz1 PREHOOK: type: QUERY PREHOOK: Input: default@tstz1 -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select cast(`__time` as timestamp) from tstz1 POSTHOOK: type: QUERY POSTHOOK: Input: default@tstz1 -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 2016-01-03 20:26:34 PREHOOK: query: select cast(`__time` as timestamp) from tstz1 where `__time` >= cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone) PREHOOK: type: QUERY PREHOOK: Input: default@tstz1 -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select cast(`__time` as timestamp) from tstz1 where `__time` >= cast('2016-01-03 12:26:34 America/Los_Angeles' as timestamp with local time zone) POSTHOOK: type: QUERY POSTHOOK: Input: default@tstz1 -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 2016-01-03 20:26:34 diff --git ql/src/test/results/clientpositive/druid/druidmini_dynamic_partition.q.out ql/src/test/results/clientpositive/druid/druidmini_dynamic_partition.q.out index 941b760..f82ecff 100644 --- ql/src/test/results/clientpositive/druid/druidmini_dynamic_partition.q.out +++ ql/src/test/results/clientpositive/druid/druidmini_dynamic_partition.q.out @@ -101,56 +101,61 @@ POSTHOOK: query: EXPLAIN CREATE TABLE druid_partitioned_table POSTHOOK: type: CREATETABLE_AS_SELECT STAGE DEPENDENCIES: Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-2, Stage-0 + Stage-3 depends on stages: Stage-4 Stage-0 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-0 - Stage-2 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ctimestamp1 is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ctimestamp1 is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CAST( ctimestamp1 AS timestamp with local time zone) (type: timestamp with local time zone), cstring1 (type: string), cstring2 (type: string), cdouble (type: double), cfloat (type: float), ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: timestamp with local time zone), _col1 (type: string), _col2 (type: string), _col3 (type: double), _col4 (type: float), _col5 (type: tinyint), _col6 (type: smallint), _col7 (type: int), _col8 (type: bigint), _col9 (type: boolean), _col10 (type: boolean), floor_hour(CAST( GenericUDFEpochMilli(_col0) AS TIMESTAMP)) (type: timestamp), (floor((1.0 / rand())) % 6) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, __time_granularity, __druid_extra_partition_key + Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: __time_granularity (type: timestamp), __druid_extra_partition_key (type: bigint) + sort order: ++ + Map-reduce partition columns: __time_granularity (type: timestamp), __druid_extra_partition_key (type: bigint) + Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: timestamp with local time zone), _col1 (type: string), _col2 (type: string), _col3 (type: double), _col4 (type: float), _col5 (type: tinyint), _col6 (type: smallint), _col7 (type: int), _col8 (type: bigint), _col9 (type: boolean), _col10 (type: boolean) + Reducer 2 + Reduce Operator Tree: Select Operator - expressions: CAST( ctimestamp1 AS timestamp with local time zone) (type: timestamp with local time zone), cstring1 (type: string), cstring2 (type: string), cdouble (type: double), cfloat (type: float), ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: timestamp with local time zone), _col1 (type: string), _col2 (type: string), _col3 (type: double), _col4 (type: float), _col5 (type: tinyint), _col6 (type: smallint), _col7 (type: int), _col8 (type: bigint), _col9 (type: boolean), _col10 (type: boolean), floor_hour(CAST( GenericUDFEpochMilli(_col0) AS TIMESTAMP)) (type: timestamp), (floor((1.0 / rand())) % 6) (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, __time_granularity, __druid_extra_partition_key - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: __time_granularity (type: timestamp), __druid_extra_partition_key (type: bigint) - sort order: ++ - Map-reduce partition columns: __time_granularity (type: timestamp), __druid_extra_partition_key (type: bigint) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: timestamp with local time zone), _col1 (type: string), _col2 (type: string), _col3 (type: double), _col4 (type: float), _col5 (type: tinyint), _col6 (type: smallint), _col7 (type: int), _col8 (type: bigint), _col9 (type: boolean), _col10 (type: boolean) - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: timestamp with local time zone), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: tinyint), VALUE._col6 (type: smallint), VALUE._col7 (type: int), VALUE._col8 (type: bigint), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), KEY.__time_granularity (type: timestamp), KEY.__druid_extra_partition_key (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, __time_granularity, __druid_extra_partition_key - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat - output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat - serde: org.apache.hadoop.hive.druid.serde.DruidSerDe - name: default.druid_partitioned_table + expressions: VALUE._col0 (type: timestamp with local time zone), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: tinyint), VALUE._col6 (type: smallint), VALUE._col7 (type: int), VALUE._col8 (type: bigint), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), KEY.__time_granularity (type: timestamp), KEY.__druid_extra_partition_key (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, __time_granularity, __druid_extra_partition_key + Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat + output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat + serde: org.apache.hadoop.hive.druid.serde.DruidSerDe + name: default.druid_partitioned_table - Stage: Stage-0 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Stage: Stage-2 + Dependency Collection - Stage: Stage-3 + Stage: Stage-4 Create Table Operator: Create Table columns: __time timestamp with local time zone, cstring1 string, cstring2 string, cdouble double, cfloat float, ctinyint tinyint, csmallint smallint, cint int, cbigint bigint, cboolean1 boolean, cboolean2 boolean @@ -161,10 +166,16 @@ STAGE PLANS: druid.segment.granularity HOUR druid.segment.targetShardsPerGranularity 6 - Stage: Stage-2 + Stage: Stage-3 Stats Work Basic Stats Work: + Stage: Stage-0 + Move Operator + files: + hdfs directory: true + destination: hdfs://### HDFS PATH ### + PREHOOK: query: CREATE TABLE druid_partitioned_table STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' TBLPROPERTIES ( @@ -227,11 +238,11 @@ POSTHOOK: Lineage: druid_partitioned_table.ctinyint SIMPLE [(alltypesorc)alltype PREHOOK: query: SELECT sum(cfloat) FROM druid_partitioned_table PREHOOK: type: QUERY PREHOOK: Input: default@druid_partitioned_table -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: SELECT sum(cfloat) FROM druid_partitioned_table POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_partitioned_table -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### -39590.246 PREHOOK: query: SELECT floor_hour(cast(`ctimestamp1` as timestamp with local time zone)) as `__time`, cstring1, @@ -247,7 +258,7 @@ PREHOOK: query: SELECT floor_hour(cast(`ctimestamp1` as timestamp with local tim FROM alltypesorc where ctimestamp1 IS NOT NULL order by `__time`, cstring2 DESC NULLS LAST, cstring1 DESC NULLS LAST LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: SELECT floor_hour(cast(`ctimestamp1` as timestamp with local time zone)) as `__time`, cstring1, cstring2, @@ -262,7 +273,7 @@ POSTHOOK: query: SELECT floor_hour(cast(`ctimestamp1` as timestamp with local ti FROM alltypesorc where ctimestamp1 IS NOT NULL order by `__time`, cstring2 DESC NULLS LAST, cstring1 DESC NULLS LAST LIMIT 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 1969-12-31 15:00:00.0 US/Pacific NULL yx36UAT823Cm -200.0 52.0 52 -200 NULL 2029007949 NULL true 1969-12-31 15:00:00.0 US/Pacific NULL yvcx4HYTT8tvAm6CNbXHaH -7196.0 40.0 40 -7196 NULL 437984126 NULL false 1969-12-31 15:00:00.0 US/Pacific NULL ysho54gMb 15601.0 -22.0 -22 15601 NULL 1553802956 NULL false @@ -325,42 +336,49 @@ STAGE PLANS: Pre-Insert task Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ctimestamp2 is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ctimestamp2 is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CAST( ctimestamp2 AS timestamp with local time zone) (type: timestamp with local time zone), cstring1 (type: string), cstring2 (type: string), cdouble (type: double), cfloat (type: float), ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: timestamp with local time zone), _col1 (type: string), _col2 (type: string), _col3 (type: double), _col4 (type: float), _col5 (type: tinyint), _col6 (type: smallint), _col7 (type: int), _col8 (type: bigint), _col9 (type: boolean), _col10 (type: boolean), floor_hour(CAST( GenericUDFEpochMilli(_col0) AS TIMESTAMP)) (type: timestamp), (floor((1.0 / rand())) % 6) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, __time_granularity, __druid_extra_partition_key + Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: __time_granularity (type: timestamp), __druid_extra_partition_key (type: bigint) + sort order: ++ + Map-reduce partition columns: __time_granularity (type: timestamp), __druid_extra_partition_key (type: bigint) + Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: timestamp with local time zone), _col1 (type: string), _col2 (type: string), _col3 (type: double), _col4 (type: float), _col5 (type: tinyint), _col6 (type: smallint), _col7 (type: int), _col8 (type: bigint), _col9 (type: boolean), _col10 (type: boolean) + Reducer 2 + Reduce Operator Tree: Select Operator - expressions: CAST( ctimestamp2 AS timestamp with local time zone) (type: timestamp with local time zone), cstring1 (type: string), cstring2 (type: string), cdouble (type: double), cfloat (type: float), ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: timestamp with local time zone), _col1 (type: string), _col2 (type: string), _col3 (type: double), _col4 (type: float), _col5 (type: tinyint), _col6 (type: smallint), _col7 (type: int), _col8 (type: bigint), _col9 (type: boolean), _col10 (type: boolean), floor_hour(CAST( GenericUDFEpochMilli(_col0) AS TIMESTAMP)) (type: timestamp), (floor((1.0 / rand())) % 6) (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, __time_granularity, __druid_extra_partition_key - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: __time_granularity (type: timestamp), __druid_extra_partition_key (type: bigint) - sort order: ++ - Map-reduce partition columns: __time_granularity (type: timestamp), __druid_extra_partition_key (type: bigint) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: timestamp with local time zone), _col1 (type: string), _col2 (type: string), _col3 (type: double), _col4 (type: float), _col5 (type: tinyint), _col6 (type: smallint), _col7 (type: int), _col8 (type: bigint), _col9 (type: boolean), _col10 (type: boolean) - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: timestamp with local time zone), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: tinyint), VALUE._col6 (type: smallint), VALUE._col7 (type: int), VALUE._col8 (type: bigint), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), KEY.__time_granularity (type: timestamp), KEY.__druid_extra_partition_key (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, __time_granularity, __druid_extra_partition_key - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat - output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat - serde: org.apache.hadoop.hive.druid.serde.DruidSerDe - name: default.druid_partitioned_table + expressions: VALUE._col0 (type: timestamp with local time zone), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: tinyint), VALUE._col6 (type: smallint), VALUE._col7 (type: int), VALUE._col8 (type: bigint), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), KEY.__time_granularity (type: timestamp), KEY.__druid_extra_partition_key (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, __time_granularity, __druid_extra_partition_key + Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat + output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat + serde: org.apache.hadoop.hive.druid.serde.DruidSerDe + name: default.druid_partitioned_table PREHOOK: query: INSERT INTO TABLE druid_partitioned_table SELECT cast (`ctimestamp2` as timestamp with local time zone) as `__time`, @@ -397,11 +415,11 @@ POSTHOOK: Output: default@druid_partitioned_table PREHOOK: query: SELECT sum(cfloat) FROM druid_partitioned_table PREHOOK: type: QUERY PREHOOK: Input: default@druid_partitioned_table -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: SELECT sum(cfloat) FROM druid_partitioned_table POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_partitioned_table -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### -46301.883 PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE druid_partitioned_table SELECT cast (`ctimestamp1` as timestamp with local time zone) as `__time`, @@ -455,42 +473,49 @@ STAGE PLANS: Pre-Insert task Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ctimestamp1 is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ctimestamp1 is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CAST( ctimestamp1 AS timestamp with local time zone) (type: timestamp with local time zone), cstring1 (type: string), cstring2 (type: string), cdouble (type: double), cfloat (type: float), ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: timestamp with local time zone), _col1 (type: string), _col2 (type: string), _col3 (type: double), _col4 (type: float), _col5 (type: tinyint), _col6 (type: smallint), _col7 (type: int), _col8 (type: bigint), _col9 (type: boolean), _col10 (type: boolean), floor_hour(CAST( GenericUDFEpochMilli(_col0) AS TIMESTAMP)) (type: timestamp), (floor((1.0 / rand())) % 6) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, __time_granularity, __druid_extra_partition_key + Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: __time_granularity (type: timestamp), __druid_extra_partition_key (type: bigint) + sort order: ++ + Map-reduce partition columns: __time_granularity (type: timestamp), __druid_extra_partition_key (type: bigint) + Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: timestamp with local time zone), _col1 (type: string), _col2 (type: string), _col3 (type: double), _col4 (type: float), _col5 (type: tinyint), _col6 (type: smallint), _col7 (type: int), _col8 (type: bigint), _col9 (type: boolean), _col10 (type: boolean) + Reducer 2 + Reduce Operator Tree: Select Operator - expressions: CAST( ctimestamp1 AS timestamp with local time zone) (type: timestamp with local time zone), cstring1 (type: string), cstring2 (type: string), cdouble (type: double), cfloat (type: float), ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: timestamp with local time zone), _col1 (type: string), _col2 (type: string), _col3 (type: double), _col4 (type: float), _col5 (type: tinyint), _col6 (type: smallint), _col7 (type: int), _col8 (type: bigint), _col9 (type: boolean), _col10 (type: boolean), floor_hour(CAST( GenericUDFEpochMilli(_col0) AS TIMESTAMP)) (type: timestamp), (floor((1.0 / rand())) % 6) (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, __time_granularity, __druid_extra_partition_key - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: __time_granularity (type: timestamp), __druid_extra_partition_key (type: bigint) - sort order: ++ - Map-reduce partition columns: __time_granularity (type: timestamp), __druid_extra_partition_key (type: bigint) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: timestamp with local time zone), _col1 (type: string), _col2 (type: string), _col3 (type: double), _col4 (type: float), _col5 (type: tinyint), _col6 (type: smallint), _col7 (type: int), _col8 (type: bigint), _col9 (type: boolean), _col10 (type: boolean) - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: timestamp with local time zone), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: tinyint), VALUE._col6 (type: smallint), VALUE._col7 (type: int), VALUE._col8 (type: bigint), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), KEY.__time_granularity (type: timestamp), KEY.__druid_extra_partition_key (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, __time_granularity, __druid_extra_partition_key - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat - output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat - serde: org.apache.hadoop.hive.druid.serde.DruidSerDe - name: default.druid_partitioned_table + expressions: VALUE._col0 (type: timestamp with local time zone), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: tinyint), VALUE._col6 (type: smallint), VALUE._col7 (type: int), VALUE._col8 (type: bigint), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), KEY.__time_granularity (type: timestamp), KEY.__druid_extra_partition_key (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, __time_granularity, __druid_extra_partition_key + Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat + output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat + serde: org.apache.hadoop.hive.druid.serde.DruidSerDe + name: default.druid_partitioned_table PREHOOK: query: INSERT OVERWRITE TABLE druid_partitioned_table SELECT cast (`ctimestamp1` as timestamp with local time zone) as `__time`, @@ -527,11 +552,11 @@ POSTHOOK: Output: default@druid_partitioned_table PREHOOK: query: SELECT sum(cfloat) FROM druid_partitioned_table PREHOOK: type: QUERY PREHOOK: Input: default@druid_partitioned_table -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: SELECT sum(cfloat) FROM druid_partitioned_table POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_partitioned_table -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### -39590.246 PREHOOK: query: CREATE TABLE druid_max_size_partition STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' @@ -593,11 +618,11 @@ POSTHOOK: Lineage: druid_max_size_partition.ctinyint SIMPLE [(alltypesorc)alltyp PREHOOK: query: SELECT sum(cfloat) FROM druid_max_size_partition PREHOOK: type: QUERY PREHOOK: Input: default@druid_max_size_partition -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: SELECT sum(cfloat) FROM druid_max_size_partition POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_max_size_partition -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### -39590.246 PREHOOK: query: DROP TABLE druid_partitioned_table_0 PREHOOK: type: DROPTABLE diff --git ql/src/test/results/clientpositive/druid/druidmini_mv.q.out ql/src/test/results/clientpositive/druid/druidmini_mv.q.out index efd6c59..f75a773 100644 --- ql/src/test/results/clientpositive/druid/druidmini_mv.q.out +++ ql/src/test/results/clientpositive/druid/druidmini_mv.q.out @@ -70,11 +70,11 @@ POSTHOOK: Output: default@cmv_mat_view PREHOOK: query: SELECT a, b, c FROM cmv_mat_view PREHOOK: type: QUERY PREHOOK: Input: default@cmv_mat_view -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: SELECT a, b, c FROM cmv_mat_view POSTHOOK: type: QUERY POSTHOOK: Input: default@cmv_mat_view -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 2 bob 3.140000104904175 2 bonnie 172342.203125 PREHOOK: query: SHOW TBLPROPERTIES cmv_mat_view @@ -115,11 +115,11 @@ POSTHOOK: Output: default@cmv_mat_view2 PREHOOK: query: SELECT a, c FROM cmv_mat_view2 PREHOOK: type: QUERY PREHOOK: Input: default@cmv_mat_view2 -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: SELECT a, c FROM cmv_mat_view2 POSTHOOK: type: QUERY POSTHOOK: Input: default@cmv_mat_view2 -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 3 978.760009765625 6 25.600000381469727 PREHOOK: query: SHOW TBLPROPERTIES cmv_mat_view2 @@ -146,53 +146,38 @@ FROM cmv_basetable WHERE a = 3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: cmv_basetable - Statistics: Num rows: 6 Data size: 10770 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (a = 3) (type: boolean) - Statistics: Num rows: 3 Data size: 5385 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 3 (type: int), c (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 5385 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 5385 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: cmv_basetable + Filter Operator + predicate: (a = 3) (type: boolean) + Select Operator + expressions: 3 (type: int), c (type: double) + outputColumnNames: _col0, _col1 + ListSink PREHOOK: query: SELECT a, c FROM cmv_basetable WHERE a = 3 PREHOOK: type: QUERY PREHOOK: Input: default@cmv_basetable -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: SELECT a, c FROM cmv_basetable WHERE a = 3 POSTHOOK: type: QUERY POSTHOOK: Input: default@cmv_basetable -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 3 15.8 3 9.8 3 978.76 -Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM ( (SELECT a, c FROM cmv_basetable WHERE a = 3) table1 @@ -213,56 +198,65 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: cmv_basetable - Statistics: Num rows: 6 Data size: 10770 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (a = 3) (type: boolean) - Statistics: Num rows: 3 Data size: 5385 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 5385 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 3 Data size: 5385 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) - TableScan - alias: cmv_basetable - Statistics: Num rows: 6 Data size: 10770 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((3 = a) and (d = 3)) (type: boolean) - Statistics: Num rows: 1 Data size: 1795 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 1795 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 1795 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 10773 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 3 (type: int), _col0 (type: double), 3 (type: int), _col1 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 10773 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 10773 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: cmv_basetable + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (a = 3) (type: boolean) + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + Map 3 + Map Operator Tree: + TableScan + alias: cmv_basetable + Statistics: Num rows: 6 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((3 = a) and (d = 3)) (type: boolean) + Statistics: Num rows: 6 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 1044 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 3 (type: int), _col0 (type: double), 3 (type: int), _col1 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 36 Data size: 1044 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 1044 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -270,7 +264,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT * FROM ( (SELECT a, c FROM cmv_basetable WHERE a = 3) table1 JOIN @@ -278,7 +272,7 @@ PREHOOK: query: SELECT * FROM ( ON table1.a = table2.a) PREHOOK: type: QUERY PREHOOK: Input: default@cmv_basetable -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: SELECT * FROM ( (SELECT a, c FROM cmv_basetable WHERE a = 3) table1 JOIN @@ -286,7 +280,7 @@ POSTHOOK: query: SELECT * FROM ( ON table1.a = table2.a) POSTHOOK: type: QUERY POSTHOOK: Input: default@cmv_basetable -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 3 15.8 3 978.76 3 9.8 3 978.76 3 978.76 3 978.76 @@ -305,7 +299,7 @@ POSTHOOK: Lineage: cmv_basetable.b SCRIPT [] POSTHOOK: Lineage: cmv_basetable.c SCRIPT [] POSTHOOK: Lineage: cmv_basetable.d SCRIPT [] POSTHOOK: Lineage: cmv_basetable.t SCRIPT [] -Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM ( (SELECT a, c FROM cmv_basetable WHERE a = 3) table1 @@ -326,56 +320,65 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: cmv_basetable - Statistics: Num rows: 6 Data size: 20240 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (a = 3) (type: boolean) - Statistics: Num rows: 3 Data size: 10120 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 10120 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 3 Data size: 10120 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) - TableScan - alias: cmv_basetable - Statistics: Num rows: 6 Data size: 20240 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((3 = a) and (d = 3)) (type: boolean) - Statistics: Num rows: 1 Data size: 3373 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3373 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 3373 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 20242 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 3 (type: int), _col0 (type: double), 3 (type: int), _col1 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 20242 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 20242 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: cmv_basetable + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (a = 3) (type: boolean) + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + Map 3 + Map Operator Tree: + TableScan + alias: cmv_basetable + Statistics: Num rows: 6 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((3 = a) and (d = 3)) (type: boolean) + Statistics: Num rows: 6 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 1044 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 3 (type: int), _col0 (type: double), 3 (type: int), _col1 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 36 Data size: 1044 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 1044 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -383,7 +386,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT * FROM ( (SELECT a, c FROM cmv_basetable WHERE a = 3) table1 JOIN @@ -391,7 +394,7 @@ PREHOOK: query: SELECT * FROM ( ON table1.a = table2.a) PREHOOK: type: QUERY PREHOOK: Input: default@cmv_basetable -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: SELECT * FROM ( (SELECT a, c FROM cmv_basetable WHERE a = 3) table1 JOIN @@ -399,7 +402,7 @@ POSTHOOK: query: SELECT * FROM ( ON table1.a = table2.a) POSTHOOK: type: QUERY POSTHOOK: Input: default@cmv_basetable -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 3 15.8 3 978.76 3 15.8 3 978.76 3 9.8 3 978.76 @@ -413,7 +416,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 - Stage-4 depends on stages: Stage-2, Stage-1, Stage-3 + Stage-5 depends on stages: Stage-2, Stage-1, Stage-3 Stage-1 is a root stage Stage-3 is a root stage @@ -430,7 +433,7 @@ STAGE PLANS: Insert operator: Insert - Stage: Stage-4 + Stage: Stage-5 Materialized View Work Stage: Stage-1 @@ -438,42 +441,49 @@ STAGE PLANS: Pre-Insert task Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: cmv_basetable - Statistics: Num rows: 6 Data size: 20240 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (a = 3) (type: boolean) - Statistics: Num rows: 3 Data size: 10120 Basic stats: COMPLETE Column stats: NONE + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: cmv_basetable + Statistics: Num rows: 6 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (a = 3) (type: boolean) + Statistics: Num rows: 6 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CAST( t AS timestamp with local time zone) (type: timestamp with local time zone), 3 (type: int), b (type: varchar(256)), c (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: timestamp with local time zone), _col1 (type: int), _col2 (type: varchar(256)), _col3 (type: double), floor_hour(CAST( GenericUDFEpochMilli(_col0) AS TIMESTAMP)) (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, __time_granularity + Statistics: Num rows: 6 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: __time_granularity (type: timestamp) + sort order: + + Map-reduce partition columns: __time_granularity (type: timestamp) + Statistics: Num rows: 6 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: timestamp with local time zone), _col1 (type: int), _col2 (type: varchar(256)), _col3 (type: double) + Reducer 2 + Reduce Operator Tree: Select Operator - expressions: CAST( t AS timestamp with local time zone) (type: timestamp with local time zone), 3 (type: int), b (type: varchar(256)), c (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 10120 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: timestamp with local time zone), _col1 (type: int), _col2 (type: varchar(256)), _col3 (type: double), floor_hour(CAST( GenericUDFEpochMilli(_col0) AS TIMESTAMP)) (type: timestamp) - outputColumnNames: _col0, _col1, _col2, _col3, __time_granularity - Statistics: Num rows: 3 Data size: 10120 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: __time_granularity (type: timestamp) - sort order: + - Map-reduce partition columns: __time_granularity (type: timestamp) - Statistics: Num rows: 3 Data size: 10120 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: timestamp with local time zone), _col1 (type: int), _col2 (type: varchar(256)), _col3 (type: double) - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: timestamp with local time zone), VALUE._col1 (type: int), VALUE._col2 (type: varchar(256)), VALUE._col3 (type: double), KEY.__time_granularity (type: timestamp) - outputColumnNames: _col0, _col1, _col2, _col3, __time_granularity - Statistics: Num rows: 3 Data size: 10120 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 3 Data size: 10120 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat - output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat - serde: org.apache.hadoop.hive.druid.serde.DruidSerDe - name: default.cmv_mat_view2 + expressions: VALUE._col0 (type: timestamp with local time zone), VALUE._col1 (type: int), VALUE._col2 (type: varchar(256)), VALUE._col3 (type: double), KEY.__time_granularity (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3, __time_granularity + Statistics: Num rows: 6 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 6 Data size: 2352 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat + output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat + serde: org.apache.hadoop.hive.druid.serde.DruidSerDe + name: default.cmv_mat_view2 PREHOOK: query: ALTER MATERIALIZED VIEW cmv_mat_view2 REBUILD PREHOOK: type: QUERY @@ -496,7 +506,7 @@ rawDataSize 0 storage_handler org.apache.hadoop.hive.druid.DruidStorageHandler totalSize 0 #### A masked pattern was here #### -Warning: Shuffle Join JOIN[6][tables = [cmv_mat_view2, $hdt$_0]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM ( (SELECT a, c FROM cmv_basetable WHERE a = 3) table1 @@ -517,52 +527,65 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: cmv_basetable - Statistics: Num rows: 6 Data size: 20240 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((3 = a) and (d = 3)) (type: boolean) - Statistics: Num rows: 1 Data size: 3373 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3373 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 3373 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) - TableScan - alias: cmv_mat_view2 - properties: - druid.query.json {"queryType":"select","dataSource":"default.cmv_mat_view2","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":[],"metrics":["c"],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select - Statistics: Num rows: 3 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 3 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: c (type: double) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col1, _col5 - Statistics: Num rows: 3 Data size: 10122 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: 3 (type: int), _col1 (type: double), 3 (type: int), _col5 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 10122 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 10122 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: cmv_basetable + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (a = 3) (type: boolean) + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + Map 3 + Map Operator Tree: + TableScan + alias: cmv_basetable + Statistics: Num rows: 6 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((3 = a) and (d = 3)) (type: boolean) + Statistics: Num rows: 6 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 1044 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 3 (type: int), _col0 (type: double), 3 (type: int), _col1 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 36 Data size: 1044 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 1044 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -570,7 +593,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[6][tables = [cmv_mat_view2, $hdt$_0]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT * FROM ( (SELECT a, c FROM cmv_basetable WHERE a = 3) table1 JOIN @@ -578,8 +601,7 @@ PREHOOK: query: SELECT * FROM ( ON table1.a = table2.a) PREHOOK: type: QUERY PREHOOK: Input: default@cmv_basetable -PREHOOK: Input: default@cmv_mat_view2 -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: SELECT * FROM ( (SELECT a, c FROM cmv_basetable WHERE a = 3) table1 JOIN @@ -587,11 +609,11 @@ POSTHOOK: query: SELECT * FROM ( ON table1.a = table2.a) POSTHOOK: type: QUERY POSTHOOK: Input: default@cmv_basetable -POSTHOOK: Input: default@cmv_mat_view2 -#### A masked pattern was here #### -3 15.800000190734863 3 978.76 -3 25.600000381469727 3 978.76 -3 978.760009765625 3 978.76 +POSTHOOK: Output: hdfs://### HDFS PATH ### +3 15.8 3 978.76 +3 15.8 3 978.76 +3 9.8 3 978.76 +3 978.76 3 978.76 PREHOOK: query: DROP MATERIALIZED VIEW cmv_mat_view PREHOOK: type: DROP_MATERIALIZED_VIEW PREHOOK: Input: default@cmv_mat_view diff --git ql/src/test/results/clientpositive/druid/druidmini_test1.q.out ql/src/test/results/clientpositive/druid/druidmini_test1.q.out index 2cbd5fb..aa68f48 100644 --- ql/src/test/results/clientpositive/druid/druidmini_test1.q.out +++ ql/src/test/results/clientpositive/druid/druidmini_test1.q.out @@ -66,21 +66,19 @@ STAGE PLANS: properties: druid.query.json {"queryType":"timeseries","dataSource":"default.druid_table","descending":false,"granularity":"all","aggregations":[{"type":"count","name":"$f0"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: $f0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink PREHOOK: query: SELECT count(*) FROM druid_table PREHOOK: type: QUERY PREHOOK: Input: default@druid_table -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: SELECT count(*) FROM druid_table POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 6105 PREHOOK: query: EXPLAIN SELECT floor_year(`__time`), SUM(cfloat), SUM(cdouble), SUM(ctinyint), SUM(csmallint),SUM(cint), SUM(cbigint) FROM druid_table GROUP BY floor_year(`__time`) @@ -101,23 +99,21 @@ STAGE PLANS: properties: druid.query.json {"queryType":"timeseries","dataSource":"default.druid_table","descending":false,"granularity":"year","aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"cfloat"},{"type":"doubleSum","name":"$f2","fieldName":"cdouble"},{"type":"longSum","name":"$f3","fieldName":"ctinyint"},{"type":"longSum","name":"$f4","fieldName":"csmallint"},{"type":"longSum","name":"$f5","fieldName":"cint"},{"type":"longSum","name":"$f6","fieldName":"cbigint"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: __time (type: timestamp with local time zone), $f1 (type: float), $f2 (type: float), $f3 (type: bigint), $f4 (type: bigint), $f5 (type: bigint), $f6 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink PREHOOK: query: SELECT floor_year(`__time`), SUM(cfloat), SUM(cdouble), SUM(ctinyint), SUM(csmallint),SUM(cint), SUM(cbigint) FROM druid_table GROUP BY floor_year(`__time`) PREHOOK: type: QUERY PREHOOK: Input: default@druid_table -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: SELECT floor_year(`__time`), SUM(cfloat), SUM(cdouble), SUM(ctinyint), SUM(csmallint),SUM(cint), SUM(cbigint) FROM druid_table GROUP BY floor_year(`__time`) POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 1968-12-31 16:00:00.0 US/Pacific -4532.57 3660538.8 -4611 3658030 688783835691 8060200254 1969-12-31 16:00:00.0 US/Pacific -35057.676 2.3648124E7 -35356 4123059 719285966109 2932345033 PREHOOK: query: EXPLAIN SELECT floor_year(`__time`), MIN(cfloat), MIN(cdouble), MIN(ctinyint), MIN(csmallint),MIN(cint), MIN(cbigint) @@ -139,23 +135,21 @@ STAGE PLANS: properties: druid.query.json {"queryType":"timeseries","dataSource":"default.druid_table","descending":false,"granularity":"year","aggregations":[{"type":"doubleMin","name":"$f1","fieldName":"cfloat"},{"type":"doubleMin","name":"$f2","fieldName":"cdouble"},{"type":"longMin","name":"$f3","fieldName":"ctinyint"},{"type":"longMin","name":"$f4","fieldName":"csmallint"},{"type":"longMin","name":"$f5","fieldName":"cint"},{"type":"longMin","name":"$f6","fieldName":"cbigint"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: __time (type: timestamp with local time zone), $f1 (type: float), $f2 (type: float), $f3 (type: bigint), $f4 (type: bigint), $f5 (type: bigint), $f6 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink PREHOOK: query: SELECT floor_year(`__time`), MIN(cfloat), MIN(cdouble), MIN(ctinyint), MIN(csmallint),MIN(cint), MIN(cbigint) FROM druid_table GROUP BY floor_year(`__time`) PREHOOK: type: QUERY PREHOOK: Input: default@druid_table -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: SELECT floor_year(`__time`), MIN(cfloat), MIN(cdouble), MIN(ctinyint), MIN(csmallint),MIN(cint), MIN(cbigint) FROM druid_table GROUP BY floor_year(`__time`) POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 1968-12-31 16:00:00.0 US/Pacific -1790.778 -308691.84 -1790 -313425 0 -8577981133 1969-12-31 16:00:00.0 US/Pacific -964.719 -287404.84 -1051 -292138 -1073279343 -2147311592 PREHOOK: query: EXPLAIN SELECT floor_year(`__time`), MAX(cfloat), MAX(cdouble), MAX(ctinyint), MAX(csmallint),MAX(cint), MAX(cbigint) @@ -177,23 +171,21 @@ STAGE PLANS: properties: druid.query.json {"queryType":"timeseries","dataSource":"default.druid_table","descending":false,"granularity":"year","aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"cfloat"},{"type":"doubleMax","name":"$f2","fieldName":"cdouble"},{"type":"longMax","name":"$f3","fieldName":"ctinyint"},{"type":"longMax","name":"$f4","fieldName":"csmallint"},{"type":"longMax","name":"$f5","fieldName":"cint"},{"type":"longMax","name":"$f6","fieldName":"cbigint"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":true}} druid.query.type timeseries - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: __time (type: timestamp with local time zone), $f1 (type: float), $f2 (type: float), $f3 (type: bigint), $f4 (type: bigint), $f5 (type: bigint), $f6 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink PREHOOK: query: SELECT floor_year(`__time`), MAX(cfloat), MAX(cdouble), MAX(ctinyint), MAX(csmallint),MAX(cint), MAX(cbigint) FROM druid_table GROUP BY floor_year(`__time`) PREHOOK: type: QUERY PREHOOK: Input: default@druid_table -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: SELECT floor_year(`__time`), MAX(cfloat), MAX(cdouble), MAX(ctinyint), MAX(csmallint),MAX(cint), MAX(cbigint) FROM druid_table GROUP BY floor_year(`__time`) POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 1968-12-31 16:00:00.0 US/Pacific 62.0 57235.0 62 57235 314088763179 2144274348 1969-12-31 16:00:00.0 US/Pacific 769.164 1.9565518E7 723 57435 319104152611 4923772860 PREHOOK: query: EXPLAIN SELECT cstring1, SUM(cdouble) as s FROM druid_table GROUP BY cstring1 ORDER BY s ASC LIMIT 10 @@ -213,21 +205,19 @@ STAGE PLANS: properties: druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table","granularity":"all","dimensions":[{"type":"default","dimension":"cstring1"}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"$f1","direction":"ascending","dimensionOrder":"numeric"}]},"aggregations":[{"type":"doubleSum","name":"$f1","fieldName":"cdouble"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: cstring1 (type: string), $f1 (type: float) outputColumnNames: _col0, _col1 - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink PREHOOK: query: SELECT cstring1, SUM(cdouble) as s FROM druid_table GROUP BY cstring1 ORDER BY s ASC LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@druid_table -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: SELECT cstring1, SUM(cdouble) as s FROM druid_table GROUP BY cstring1 ORDER BY s ASC LIMIT 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 1cGVWH7n1QU -596096.7 821UdmGbkEf4j -14161.827 00iT08 0.0 @@ -255,21 +245,19 @@ STAGE PLANS: properties: druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table","granularity":"all","dimensions":[{"type":"default","dimension":"cstring2"}],"limitSpec":{"type":"default","limit":10,"columns":[{"dimension":"cstring2","direction":"ascending","dimensionOrder":"alphanumeric"}]},"aggregations":[{"type":"doubleMax","name":"$f1","fieldName":"cdouble"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"]} druid.query.type groupBy - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: cstring2 (type: string), $f1 (type: float) outputColumnNames: _col0, _col1 - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink PREHOOK: query: SELECT cstring2, MAX(cdouble) FROM druid_table GROUP BY cstring2 ORDER BY cstring2 ASC LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@druid_table -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: SELECT cstring2, MAX(cdouble) FROM druid_table GROUP BY cstring2 ORDER BY cstring2 ASC LIMIT 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### NULL 1.9565518E7 0AAE3daA78MISbsRsHJrp2PI 0.0 0amu3m60U20Xa3 -200.0 @@ -294,38 +282,45 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: druid_table - properties: - druid.query.json {"queryType":"select","dataSource":"default.druid_table","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: __time (type: timestamp with local time zone) - outputColumnNames: _col0 - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: timestamp with local time zone) - sort order: + - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: timestamp with local time zone) - outputColumnNames: _col0 - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: druid_table + properties: + druid.query.json {"queryType":"select","dataSource":"default.druid_table","descending":false,"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} + druid.query.type select + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: __time (type: timestamp with local time zone) + outputColumnNames: _col0 + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp with local time zone) + sort order: + + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp with local time zone) + outputColumnNames: _col0 + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 380 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 380 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -337,12 +332,12 @@ PREHOOK: query: SELECT `__time` FROM druid_table ORDER BY `__time` ASC LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@druid_table -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: SELECT `__time` FROM druid_table ORDER BY `__time` ASC LIMIT 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 1969-12-31 15:59:00.0 US/Pacific 1969-12-31 15:59:00.0 US/Pacific 1969-12-31 15:59:00.0 US/Pacific @@ -369,38 +364,45 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: druid_table - properties: - druid.query.json {"queryType":"select","dataSource":"default.druid_table","descending":false,"intervals":["1900-01-01T00:00:00.000Z/1970-03-01T08:00:00.000Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: __time (type: timestamp with local time zone) - outputColumnNames: _col0 - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: timestamp with local time zone) - sort order: + - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: timestamp with local time zone) - outputColumnNames: _col0 - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: druid_table + properties: + druid.query.json {"queryType":"select","dataSource":"default.druid_table","descending":false,"intervals":["1900-01-01T00:00:00.000Z/1970-03-01T08:00:00.000Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} + druid.query.type select + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: __time (type: timestamp with local time zone) + outputColumnNames: _col0 + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp with local time zone) + sort order: + + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp with local time zone) + outputColumnNames: _col0 + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 380 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 380 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -413,13 +415,13 @@ FROM druid_table WHERE `__time` < '1970-03-01 00:00:00' ORDER BY `__time` ASC LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@druid_table -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: SELECT `__time` FROM druid_table WHERE `__time` < '1970-03-01 00:00:00' ORDER BY `__time` ASC LIMIT 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 1969-12-31 15:59:00.0 US/Pacific 1969-12-31 15:59:00.0 US/Pacific 1969-12-31 15:59:00.0 US/Pacific @@ -446,38 +448,45 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: druid_table - properties: - druid.query.json {"queryType":"select","dataSource":"default.druid_table","descending":false,"intervals":["1968-01-01T08:00:00.000Z/1970-03-01T08:00:00.001Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: __time (type: timestamp with local time zone) - outputColumnNames: _col0 - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: timestamp with local time zone) - sort order: + - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: timestamp with local time zone) - outputColumnNames: _col0 - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: druid_table + properties: + druid.query.json {"queryType":"select","dataSource":"default.druid_table","descending":false,"intervals":["1968-01-01T08:00:00.000Z/1970-03-01T08:00:00.001Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} + druid.query.type select + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: __time (type: timestamp with local time zone) + outputColumnNames: _col0 + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp with local time zone) + sort order: + + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp with local time zone) + outputColumnNames: _col0 + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 380 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 380 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -490,13 +499,13 @@ FROM druid_table WHERE `__time` >= '1968-01-01 00:00:00' AND `__time` <= '1970-03-01 00:00:00' ORDER BY `__time` ASC LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@druid_table -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: SELECT `__time` FROM druid_table WHERE `__time` >= '1968-01-01 00:00:00' AND `__time` <= '1970-03-01 00:00:00' ORDER BY `__time` ASC LIMIT 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 1969-12-31 15:59:00.0 US/Pacific 1969-12-31 15:59:00.0 US/Pacific 1969-12-31 15:59:00.0 US/Pacific @@ -525,38 +534,45 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: druid_table - properties: - druid.query.json {"queryType":"select","dataSource":"default.druid_table","descending":false,"intervals":["1968-01-01T08:00:00.000Z/1970-03-01T08:00:00.001Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: __time (type: timestamp with local time zone) - outputColumnNames: _col0 - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: timestamp with local time zone) - sort order: + - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: timestamp with local time zone) - outputColumnNames: _col0 - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: druid_table + properties: + druid.query.json {"queryType":"select","dataSource":"default.druid_table","descending":false,"intervals":["1968-01-01T08:00:00.000Z/1970-03-01T08:00:00.001Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} + druid.query.type select + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: __time (type: timestamp with local time zone) + outputColumnNames: _col0 + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp with local time zone) + sort order: + + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp with local time zone) + outputColumnNames: _col0 + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 380 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 380 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -570,14 +586,14 @@ WHERE `__time` >= '1968-01-01 00:00:00' AND `__time` <= '1970-03-01 00:00:00' AND `__time` < '2011-01-01 00:00:00' ORDER BY `__time` ASC LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@druid_table -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: SELECT `__time` FROM druid_table WHERE `__time` >= '1968-01-01 00:00:00' AND `__time` <= '1970-03-01 00:00:00' AND `__time` < '2011-01-01 00:00:00' ORDER BY `__time` ASC LIMIT 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 1969-12-31 15:59:00.0 US/Pacific 1969-12-31 15:59:00.0 US/Pacific 1969-12-31 15:59:00.0 US/Pacific @@ -604,38 +620,45 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: druid_table - properties: - druid.query.json {"queryType":"select","dataSource":"default.druid_table","descending":false,"intervals":["1968-01-01T08:00:00.000Z/1970-01-01T08:00:00.001Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: __time (type: timestamp with local time zone) - outputColumnNames: _col0 - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: timestamp with local time zone) - sort order: + - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: timestamp with local time zone) - outputColumnNames: _col0 - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: druid_table + properties: + druid.query.json {"queryType":"select","dataSource":"default.druid_table","descending":false,"intervals":["1968-01-01T08:00:00.000Z/1970-01-01T08:00:00.001Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} + druid.query.type select + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: __time (type: timestamp with local time zone) + outputColumnNames: _col0 + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp with local time zone) + sort order: + + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp with local time zone) + outputColumnNames: _col0 + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 380 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 380 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -648,13 +671,13 @@ FROM druid_table WHERE `__time` BETWEEN '1968-01-01 00:00:00' AND '1970-01-01 00:00:00' ORDER BY `__time` ASC LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@druid_table -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: SELECT `__time` FROM druid_table WHERE `__time` BETWEEN '1968-01-01 00:00:00' AND '1970-01-01 00:00:00' ORDER BY `__time` ASC LIMIT 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 1969-12-31 15:59:00.0 US/Pacific 1969-12-31 15:59:00.0 US/Pacific 1969-12-31 15:59:00.0 US/Pacific @@ -683,38 +706,45 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: druid_table - properties: - druid.query.json {"queryType":"select","dataSource":"default.druid_table","descending":false,"intervals":["1968-01-01T08:00:00.000Z/1970-04-01T08:00:00.001Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} - druid.query.type select - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: __time (type: timestamp with local time zone) - outputColumnNames: _col0 - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: timestamp with local time zone) - sort order: + - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: timestamp with local time zone) - outputColumnNames: _col0 - Statistics: Num rows: 9173 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: druid_table + properties: + druid.query.json {"queryType":"select","dataSource":"default.druid_table","descending":false,"intervals":["1968-01-01T08:00:00.000Z/1970-04-01T08:00:00.001Z"],"dimensions":[],"metrics":[],"granularity":"all","pagingSpec":{"threshold":16384,"fromNext":true},"context":{"druid.query.fetch":false}} + druid.query.type select + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: __time (type: timestamp with local time zone) + outputColumnNames: _col0 + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp with local time zone) + sort order: + + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp with local time zone) + outputColumnNames: _col0 + Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 380 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 380 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -728,14 +758,14 @@ WHERE (`__time` BETWEEN '1968-01-01 00:00:00' AND '1970-01-01 00:00:00') OR (`__time` BETWEEN '1968-02-01 00:00:00' AND '1970-04-01 00:00:00') ORDER BY `__time` ASC LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@druid_table -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: SELECT `__time` FROM druid_table WHERE (`__time` BETWEEN '1968-01-01 00:00:00' AND '1970-01-01 00:00:00') OR (`__time` BETWEEN '1968-02-01 00:00:00' AND '1970-04-01 00:00:00') ORDER BY `__time` ASC LIMIT 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_table -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 1969-12-31 15:59:00.0 US/Pacific 1969-12-31 15:59:00.0 US/Pacific 1969-12-31 15:59:00.0 US/Pacific diff --git ql/src/test/results/clientpositive/druid/druidmini_test_insert.q.out ql/src/test/results/clientpositive/druid/druidmini_test_insert.q.out index 7e01b0d..8b79f6a 100644 --- ql/src/test/results/clientpositive/druid/druidmini_test_insert.q.out +++ ql/src/test/results/clientpositive/druid/druidmini_test_insert.q.out @@ -52,11 +52,11 @@ POSTHOOK: Lineage: druid_alltypesorc.ctinyint SIMPLE [(alltypesorc)alltypesorc.F PREHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc PREHOOK: type: QUERY PREHOOK: Input: default@druid_alltypesorc -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_alltypesorc -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 6057 PREHOOK: query: INSERT INTO TABLE druid_alltypesorc SELECT cast (`ctimestamp1` as timestamp with local time zone) as `__time`, @@ -93,11 +93,11 @@ POSTHOOK: Output: default@druid_alltypesorc PREHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc PREHOOK: type: QUERY PREHOOK: Input: default@druid_alltypesorc -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_alltypesorc -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 12162 PREHOOK: query: INSERT OVERWRITE TABLE druid_alltypesorc SELECT cast (`ctimestamp1` as timestamp with local time zone) as `__time`, @@ -134,11 +134,11 @@ POSTHOOK: Output: default@druid_alltypesorc PREHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc PREHOOK: type: QUERY PREHOOK: Input: default@druid_alltypesorc -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: SELECT COUNT(*) FROM druid_alltypesorc POSTHOOK: type: QUERY POSTHOOK: Input: default@druid_alltypesorc -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 6105 PREHOOK: query: DROP TABLE druid_alltypesorc PREHOOK: type: DROPTABLE