diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index 2945f59..036f122 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -49,6 +49,9 @@ import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; import org.apache.hadoop.hive.ql.io.RCFileInputFormat; import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; +import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat; +import org.apache.hadoop.hive.ql.io.orc.OrcSerde; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -111,6 +114,12 @@ .getName(); protected static final String RCFILE_OUTPUT = RCFileOutputFormat.class .getName(); + protected static final String ORCFILE_INPUT = OrcInputFormat.class + .getName(); + protected static final String ORCFILE_OUTPUT = OrcOutputFormat.class + .getName(); + protected static final String ORCFILE_SERDE = OrcSerde.class + .getName(); protected static final String COLUMNAR_SERDE = ColumnarSerDe.class.getName(); class RowFormatParams { @@ -189,6 +198,14 @@ protected boolean fillStorageFormat(ASTNode child, AnalyzeCreateCommonVars share } storageFormat = true; break; + case HiveParser.TOK_TBLORCFILE: + inputFormat = ORCFILE_INPUT; + outputFormat = ORCFILE_OUTPUT; + if (shared.serde == null) { + shared.serde = ORCFILE_SERDE; + } + storageFormat = true; + break; case HiveParser.TOK_TABLEFILEFORMAT: inputFormat = unescapeSQLString(child.getChild(0).getText()); outputFormat = unescapeSQLString(child.getChild(1).getText()); @@ -216,6 +233,10 @@ protected void fillDefaultStorageFormat(AnalyzeCreateCommonVars shared) { inputFormat = RCFILE_INPUT; outputFormat = RCFILE_OUTPUT; shared.serde = COLUMNAR_SERDE; + } else if ("ORC".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT))) { + inputFormat = ORCFILE_INPUT; + outputFormat = ORCFILE_OUTPUT; + shared.serde = ORCFILE_SERDE; } else { inputFormat = TEXTFILE_INPUT; outputFormat = TEXTFILE_OUTPUT; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index 1218caf..ed31dbd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -1143,6 +1143,11 @@ private void analyzeAlterTableFileFormat(ASTNode ast, String tableName, outputFormat = RCFILE_OUTPUT; serde = COLUMNAR_SERDE; break; + case HiveParser.TOK_TBLORCFILE: + inputFormat = ORCFILE_INPUT; + outputFormat = ORCFILE_OUTPUT; + serde = ORCFILE_SERDE; + break; case HiveParser.TOK_FILEFORMAT_GENERIC: handleGenericFileFormat(child); break; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 2271627..41d7601 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -132,6 +132,7 @@ KW_FILEFORMAT: 'FILEFORMAT'; KW_SEQUENCEFILE: 'SEQUENCEFILE'; KW_TEXTFILE: 'TEXTFILE'; KW_RCFILE: 'RCFILE'; +KW_ORCFILE: 'ORC'; KW_INPUTFORMAT: 'INPUTFORMAT'; KW_OUTPUTFORMAT: 'OUTPUTFORMAT'; KW_INPUTDRIVER: 'INPUTDRIVER'; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index d379875..f9cee4e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -175,6 +175,7 @@ TOK_TABLEROWFORMATFIELD; TOK_TABLEROWFORMATCOLLITEMS; TOK_TABLEROWFORMATMAPKEYS; TOK_TABLEROWFORMATLINES; +TOK_TBLORCFILE; TOK_TBLSEQUENCEFILE; TOK_TBLTEXTFILE; TOK_TBLRCFILE; @@ -1145,6 +1146,7 @@ fileFormat : KW_SEQUENCEFILE -> ^(TOK_TBLSEQUENCEFILE) | KW_TEXTFILE -> ^(TOK_TBLTEXTFILE) | KW_RCFILE -> ^(TOK_TBLRCFILE) + | KW_ORCFILE -> ^(TOK_TBLORCFILE) | KW_INPUTFORMAT inFmt=StringLiteral KW_OUTPUTFORMAT outFmt=StringLiteral (KW_INPUTDRIVER inDriver=StringLiteral KW_OUTPUTDRIVER outDriver=StringLiteral)? -> ^(TOK_TABLEFILEFORMAT $inFmt $outFmt $inDriver? $outDriver?) | genericSpec=identifier -> ^(TOK_FILEFORMAT_GENERIC $genericSpec) @@ -1562,6 +1564,7 @@ tableFileFormat KW_STORED KW_AS KW_SEQUENCEFILE -> TOK_TBLSEQUENCEFILE | KW_STORED KW_AS KW_TEXTFILE -> TOK_TBLTEXTFILE | KW_STORED KW_AS KW_RCFILE -> TOK_TBLRCFILE + | KW_STORED KW_AS KW_ORCFILE -> TOK_TBLORCFILE | KW_STORED KW_AS KW_INPUTFORMAT inFmt=StringLiteral KW_OUTPUTFORMAT outFmt=StringLiteral (KW_INPUTDRIVER inDriver=StringLiteral KW_OUTPUTDRIVER outDriver=StringLiteral)? -> ^(TOK_TABLEFILEFORMAT $inFmt $outFmt $inDriver? $outDriver?) | KW_STORED KW_BY storageHandler=StringLiteral diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index e053ebf..4f12259 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -500,5 +500,5 @@ identifier nonReserved : - KW_TRUE | KW_FALSE | KW_ALL | KW_AND | KW_OR | KW_NOT | KW_LIKE | KW_EXISTS | KW_ASC | KW_DESC | KW_ORDER | KW_GROUP | KW_BY | KW_FROM | KW_AS | KW_DISTINCT | KW_INSERT | KW_OVERWRITE | KW_OUTER | KW_PRESERVE | KW_LEFT | KW_RIGHT | KW_FULL | KW_PARTITION | KW_PARTITIONS | KW_TABLE | KW_TABLES | KW_COLUMNS | KW_INDEX | KW_INDEXES | KW_REBUILD | KW_FUNCTIONS | KW_SHOW | KW_MSCK | KW_REPAIR | KW_DIRECTORY | KW_LOCAL | KW_USING | KW_CLUSTER | KW_DISTRIBUTE | KW_SORT | KW_UNION | KW_LOAD | KW_EXPORT | KW_IMPORT | KW_DATA | KW_INPATH | KW_IS | KW_NULL | KW_CREATE | KW_EXTERNAL | KW_ALTER | KW_CHANGE | KW_COLUMN | KW_FIRST | KW_AFTER | KW_DESCRIBE | KW_DROP | KW_RENAME | KW_IGNORE | KW_PROTECTION | KW_TO | KW_COMMENT | KW_BOOLEAN | KW_TINYINT | KW_SMALLINT | KW_INT | KW_BIGINT | KW_FLOAT | KW_DOUBLE | KW_DATE | KW_DATETIME | KW_TIMESTAMP | KW_DECIMAL | KW_STRING | KW_ARRAY | KW_STRUCT | KW_UNIONTYPE | KW_PARTITIONED | KW_CLUSTERED | KW_SORTED | KW_INTO | KW_BUCKETS | KW_ROW | KW_ROWS | KW_FORMAT | KW_DELIMITED | KW_FIELDS | KW_TERMINATED | KW_ESCAPED | KW_COLLECTION | KW_ITEMS | KW_KEYS | KW_KEY_TYPE | KW_LINES | KW_STORED | KW_FILEFORMAT | KW_SEQUENCEFILE | KW_TEXTFILE | KW_RCFILE | KW_INPUTFORMAT | KW_OUTPUTFORMAT | KW_INPUTDRIVER | KW_OUTPUTDRIVER | KW_OFFLINE | KW_ENABLE | KW_DISABLE | KW_READONLY | KW_NO_DROP | KW_LOCATION | KW_BUCKET | KW_OUT | KW_OF | KW_PERCENT | KW_ADD | KW_REPLACE | KW_RLIKE | KW_REGEXP | KW_TEMPORARY | KW_EXPLAIN | KW_FORMATTED | KW_PRETTY | KW_DEPENDENCY | KW_SERDE | KW_WITH | KW_DEFERRED | KW_SERDEPROPERTIES | KW_DBPROPERTIES | KW_LIMIT | KW_SET | KW_UNSET | KW_TBLPROPERTIES | KW_IDXPROPERTIES | KW_VALUE_TYPE | KW_ELEM_TYPE | KW_MAPJOIN | KW_STREAMTABLE | KW_HOLD_DDLTIME | KW_CLUSTERSTATUS | KW_UTC | KW_UTCTIMESTAMP | KW_LONG | KW_DELETE | KW_PLUS | KW_MINUS | KW_FETCH | KW_INTERSECT | KW_VIEW | KW_IN | KW_DATABASES | KW_MATERIALIZED | KW_SCHEMA | KW_SCHEMAS | KW_GRANT | KW_REVOKE | KW_SSL | KW_UNDO | KW_LOCK | KW_LOCKS | KW_UNLOCK | KW_SHARED | KW_EXCLUSIVE | KW_PROCEDURE | KW_UNSIGNED | KW_WHILE | KW_READ | KW_READS | KW_PURGE | KW_RANGE | KW_ANALYZE | KW_BEFORE | KW_BETWEEN | KW_BOTH | KW_BINARY | KW_CONTINUE | KW_CURSOR | KW_TRIGGER | KW_RECORDREADER | KW_RECORDWRITER | KW_SEMI | KW_LATERAL | KW_TOUCH | KW_ARCHIVE | KW_UNARCHIVE | KW_COMPUTE | KW_STATISTICS | KW_USE | KW_OPTION | KW_CONCATENATE | KW_SHOW_DATABASE | KW_UPDATE | KW_RESTRICT | KW_CASCADE | KW_SKEWED | KW_ROLLUP | KW_CUBE | KW_DIRECTORIES | KW_FOR | KW_GROUPING | KW_SETS | KW_TRUNCATE | KW_NOSCAN | KW_USER | KW_ROLE | KW_INNER + KW_TRUE | KW_FALSE | KW_ALL | KW_AND | KW_OR | KW_NOT | KW_LIKE | KW_EXISTS | KW_ASC | KW_DESC | KW_ORDER | KW_GROUP | KW_BY | KW_FROM | KW_AS | KW_DISTINCT | KW_INSERT | KW_OVERWRITE | KW_OUTER | KW_PRESERVE | KW_LEFT | KW_RIGHT | KW_FULL | KW_PARTITION | KW_PARTITIONS | KW_TABLE | KW_TABLES | KW_COLUMNS | KW_INDEX | KW_INDEXES | KW_REBUILD | KW_FUNCTIONS | KW_SHOW | KW_MSCK | KW_REPAIR | KW_DIRECTORY | KW_LOCAL | KW_USING | KW_CLUSTER | KW_DISTRIBUTE | KW_SORT | KW_UNION | KW_LOAD | KW_EXPORT | KW_IMPORT | KW_DATA | KW_INPATH | KW_IS | KW_NULL | KW_CREATE | KW_EXTERNAL | KW_ALTER | KW_CHANGE | KW_COLUMN | KW_FIRST | KW_AFTER | KW_DESCRIBE | KW_DROP | KW_RENAME | KW_IGNORE | KW_PROTECTION | KW_TO | KW_COMMENT | KW_BOOLEAN | KW_TINYINT | KW_SMALLINT | KW_INT | KW_BIGINT | KW_FLOAT | KW_DOUBLE | KW_DATE | KW_DATETIME | KW_TIMESTAMP | KW_DECIMAL | KW_STRING | KW_ARRAY | KW_STRUCT | KW_UNIONTYPE | KW_PARTITIONED | KW_CLUSTERED | KW_SORTED | KW_INTO | KW_BUCKETS | KW_ROW | KW_ROWS | KW_FORMAT | KW_DELIMITED | KW_FIELDS | KW_TERMINATED | KW_ESCAPED | KW_COLLECTION | KW_ITEMS | KW_KEYS | KW_KEY_TYPE | KW_LINES | KW_STORED | KW_FILEFORMAT | KW_SEQUENCEFILE | KW_TEXTFILE | KW_RCFILE | KW_ORCFILE | KW_INPUTFORMAT | KW_OUTPUTFORMAT | KW_INPUTDRIVER | KW_OUTPUTDRIVER | KW_OFFLINE | KW_ENABLE | KW_DISABLE | KW_READONLY | KW_NO_DROP | KW_LOCATION | KW_BUCKET | KW_OUT | KW_OF | KW_PERCENT | KW_ADD | KW_REPLACE | KW_RLIKE | KW_REGEXP | KW_TEMPORARY | KW_EXPLAIN | KW_FORMATTED | KW_PRETTY | KW_DEPENDENCY | KW_SERDE | KW_WITH | KW_DEFERRED | KW_SERDEPROPERTIES | KW_DBPROPERTIES | KW_LIMIT | KW_SET | KW_UNSET | KW_TBLPROPERTIES | KW_IDXPROPERTIES | KW_VALUE_TYPE | KW_ELEM_TYPE | KW_MAPJOIN | KW_STREAMTABLE | KW_HOLD_DDLTIME | KW_CLUSTERSTATUS | KW_UTC | KW_UTCTIMESTAMP | KW_LONG | KW_DELETE | KW_PLUS | KW_MINUS | KW_FETCH | KW_INTERSECT | KW_VIEW | KW_IN | KW_DATABASES | KW_MATERIALIZED | KW_SCHEMA | KW_SCHEMAS | KW_GRANT | KW_REVOKE | KW_SSL | KW_UNDO | KW_LOCK | KW_LOCKS | KW_UNLOCK | KW_SHARED | KW_EXCLUSIVE | KW_PROCEDURE | KW_UNSIGNED | KW_WHILE | KW_READ | KW_READS | KW_PURGE | KW_RANGE | KW_ANALYZE | KW_BEFORE | KW_BETWEEN | KW_BOTH | KW_BINARY | KW_CONTINUE | KW_CURSOR | KW_TRIGGER | KW_RECORDREADER | KW_RECORDWRITER | KW_SEMI | KW_LATERAL | KW_TOUCH | KW_ARCHIVE | KW_UNARCHIVE | KW_COMPUTE | KW_STATISTICS | KW_USE | KW_OPTION | KW_CONCATENATE | KW_SHOW_DATABASE | KW_UPDATE | KW_RESTRICT | KW_CASCADE | KW_SKEWED | KW_ROLLUP | KW_CUBE | KW_DIRECTORIES | KW_FOR | KW_GROUPING | KW_SETS | KW_TRUNCATE | KW_NOSCAN | KW_USER | KW_ROLE | KW_INNER ; diff --git ql/src/test/queries/clientpositive/orc_create.q ql/src/test/queries/clientpositive/orc_create.q new file mode 100644 index 0000000..8f9ae36 --- /dev/null +++ ql/src/test/queries/clientpositive/orc_create.q @@ -0,0 +1,9 @@ +DROP TABLE orc_create; + +CREATE TABLE orc_create (key INT, value STRING) + PARTITIONED BY (ds string) + STORED AS ORC; + +DESCRIBE FORMATTED orc_create; + +DROP TABLE orc_create; diff --git ql/src/test/queries/clientpositive/orc_createas1.q ql/src/test/queries/clientpositive/orc_createas1.q new file mode 100644 index 0000000..d8cd433 --- /dev/null +++ ql/src/test/queries/clientpositive/orc_createas1.q @@ -0,0 +1,48 @@ +set mapred.max.split.size=100; +set mapred.min.split.size=1; + +DROP TABLE orc_createas1a; +DROP TABLE orc_createas1b; +DROP TABLE orc_createas1c; + +CREATE TABLE orc_createas1a (key INT, value STRING) + PARTITIONED BY (ds string); +INSERT OVERWRITE TABLE orc_createas1a PARTITION (ds='1') + SELECT * FROM src; +INSERT OVERWRITE TABLE orc_createas1a PARTITION (ds='2') + SELECT * FROM src; + +EXPLAIN CREATE TABLE orc_createas1b + STORED AS ORC AS + SELECT * FROM src; + +CREATE TABLE orc_createas1b + STORED AS ORC AS + SELECT * FROM src; + +EXPLAIN SELECT * FROM orc_createas1b LIMIT 5; + +SELECT * FROM orc_createas1b LIMIT 5; + +EXPLAIN + CREATE TABLE orc_createas1c + STORED AS ORC AS + SELECT key, value, PMOD(HASH(key), 50) as part + FROM orc_createas1a; +CREATE TABLE orc_createas1c + STORED AS ORC AS + SELECT key, value, PMOD(HASH(key), 50) as part + FROM orc_createas1a; + +SELECT SUM(HASH(c)) FROM ( + SELECT TRANSFORM(key, value) USING 'tr \t _' AS (c) + FROM orc_createas1a +) t; +SELECT SUM(HASH(c)) FROM ( + SELECT TRANSFORM(key, value) USING 'tr \t _' AS (c) + FROM orc_createas1c +) t; + +DROP TABLE orc_createas1a; +DROP TABLE orc_createas1b; +DROP TABLE orc_createas1c; diff --git ql/src/test/results/clientpositive/orc_create.q.out ql/src/test/results/clientpositive/orc_create.q.out new file mode 100644 index 0000000..ff3bb50 --- /dev/null +++ ql/src/test/results/clientpositive/orc_create.q.out @@ -0,0 +1,55 @@ +PREHOOK: query: DROP TABLE orc_create +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE orc_create +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE orc_create (key INT, value STRING) + PARTITIONED BY (ds string) + STORED AS ORC +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE orc_create (key INT, value STRING) + PARTITIONED BY (ds string) + STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@orc_create +PREHOOK: query: DESCRIBE FORMATTED orc_create +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED orc_create +POSTHOOK: type: DESCTABLE +# col_name data_type comment + +key int from deserializer +value string from deserializer + +# Partition Information +# col_name data_type comment + +ds string None + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: DROP TABLE orc_create +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orc_create +PREHOOK: Output: default@orc_create +POSTHOOK: query: DROP TABLE orc_create +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@orc_create +POSTHOOK: Output: default@orc_create diff --git ql/src/test/results/clientpositive/orc_createas1.q.out ql/src/test/results/clientpositive/orc_createas1.q.out new file mode 100644 index 0000000..5bfbec2 --- /dev/null +++ ql/src/test/results/clientpositive/orc_createas1.q.out @@ -0,0 +1,432 @@ +PREHOOK: query: DROP TABLE orc_createas1a +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE orc_createas1a +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE orc_createas1b +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE orc_createas1b +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE orc_createas1c +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE orc_createas1c +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE orc_createas1a (key INT, value STRING) + PARTITIONED BY (ds string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE orc_createas1a (key INT, value STRING) + PARTITIONED BY (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@orc_createas1a +PREHOOK: query: INSERT OVERWRITE TABLE orc_createas1a PARTITION (ds='1') + SELECT * FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@orc_createas1a@ds=1 +POSTHOOK: query: INSERT OVERWRITE TABLE orc_createas1a PARTITION (ds='1') + SELECT * FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@orc_createas1a@ds=1 +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE orc_createas1a PARTITION (ds='2') + SELECT * FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@orc_createas1a@ds=2 +POSTHOOK: query: INSERT OVERWRITE TABLE orc_createas1a PARTITION (ds='2') + SELECT * FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@orc_createas1a@ds=2 +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: EXPLAIN CREATE TABLE orc_createas1b + STORED AS ORC AS + SELECT * FROM src +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: EXPLAIN CREATE TABLE orc_createas1b + STORED AS ORC AS + SELECT * FROM src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_CREATETABLE (TOK_TABNAME orc_createas1b) TOK_LIKETABLE TOK_TBLORCFILE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-8 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-8 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + name: default.orc_createas1b + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-8 + Create Table Operator: + Create Table + columns: key string, value string + if not exists: false + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + # buckets: -1 + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde name: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: orc_createas1b + isExternal: false + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + name: default.orc_createas1b + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + name: default.orc_createas1b + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + +PREHOOK: query: CREATE TABLE orc_createas1b + STORED AS ORC AS + SELECT * FROM src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +POSTHOOK: query: CREATE TABLE orc_createas1b + STORED AS ORC AS + SELECT * FROM src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@orc_createas1b +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: EXPLAIN SELECT * FROM orc_createas1b LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT * FROM orc_createas1b LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME orc_createas1b))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 5))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + TableScan + alias: orc_createas1b + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Limit + ListSink + + +PREHOOK: query: SELECT * FROM orc_createas1b LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_createas1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM orc_createas1b LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_createas1b +#### A masked pattern was here #### +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238 val_238 +86 val_86 +311 val_311 +27 val_27 +165 val_165 +PREHOOK: query: EXPLAIN + CREATE TABLE orc_createas1c + STORED AS ORC AS + SELECT key, value, PMOD(HASH(key), 50) as part + FROM orc_createas1a +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: EXPLAIN + CREATE TABLE orc_createas1c + STORED AS ORC AS + SELECT key, value, PMOD(HASH(key), 50) as part + FROM orc_createas1a +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_CREATETABLE (TOK_TABNAME orc_createas1c) TOK_LIKETABLE TOK_TBLORCFILE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME orc_createas1a))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION PMOD (TOK_FUNCTION HASH (TOK_TABLE_OR_COL key)) 50) part))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-8 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-8 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + orc_createas1a + TableScan + alias: orc_createas1a + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + expr: pmod(hash(key), 50) + type: int + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + name: default.orc_createas1c + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-8 + Create Table Operator: + Create Table + columns: key int, value string, part int + if not exists: false + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + # buckets: -1 + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde name: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: orc_createas1c + isExternal: false + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + name: default.orc_createas1c + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + name: default.orc_createas1c + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + +PREHOOK: query: CREATE TABLE orc_createas1c + STORED AS ORC AS + SELECT key, value, PMOD(HASH(key), 50) as part + FROM orc_createas1a +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@orc_createas1a +PREHOOK: Input: default@orc_createas1a@ds=1 +PREHOOK: Input: default@orc_createas1a@ds=2 +POSTHOOK: query: CREATE TABLE orc_createas1c + STORED AS ORC AS + SELECT key, value, PMOD(HASH(key), 50) as part + FROM orc_createas1a +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@orc_createas1a +POSTHOOK: Input: default@orc_createas1a@ds=1 +POSTHOOK: Input: default@orc_createas1a@ds=2 +POSTHOOK: Output: default@orc_createas1c +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT SUM(HASH(c)) FROM ( + SELECT TRANSFORM(key, value) USING 'tr \t _' AS (c) + FROM orc_createas1a +) t +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_createas1a +PREHOOK: Input: default@orc_createas1a@ds=1 +PREHOOK: Input: default@orc_createas1a@ds=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(c)) FROM ( + SELECT TRANSFORM(key, value) USING 'tr \t _' AS (c) + FROM orc_createas1a +) t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_createas1a +POSTHOOK: Input: default@orc_createas1a@ds=1 +POSTHOOK: Input: default@orc_createas1a@ds=2 +#### A masked pattern was here #### +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +14412220296 +PREHOOK: query: SELECT SUM(HASH(c)) FROM ( + SELECT TRANSFORM(key, value) USING 'tr \t _' AS (c) + FROM orc_createas1c +) t +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_createas1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(c)) FROM ( + SELECT TRANSFORM(key, value) USING 'tr \t _' AS (c) + FROM orc_createas1c +) t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_createas1c +#### A masked pattern was here #### +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +14412220296 +PREHOOK: query: DROP TABLE orc_createas1a +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orc_createas1a +PREHOOK: Output: default@orc_createas1a +POSTHOOK: query: DROP TABLE orc_createas1a +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@orc_createas1a +POSTHOOK: Output: default@orc_createas1a +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DROP TABLE orc_createas1b +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orc_createas1b +PREHOOK: Output: default@orc_createas1b +POSTHOOK: query: DROP TABLE orc_createas1b +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@orc_createas1b +POSTHOOK: Output: default@orc_createas1b +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: DROP TABLE orc_createas1c +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orc_createas1c +PREHOOK: Output: default@orc_createas1c +POSTHOOK: query: DROP TABLE orc_createas1c +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@orc_createas1c +POSTHOOK: Output: default@orc_createas1c +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: orc_createas1a PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]