diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 2aa90437449eff13824880a0170b94c2cf9737b9..d2410ace0152e63d47716e5ebaa961966d669d2b 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -516,6 +516,7 @@ minillaplocal.query.files=\ correlationoptimizer2.q,\ correlationoptimizer4.q,\ correlationoptimizer6.q,\ + csv_llap.q,\ default_constraint.q,\ disable_merge_for_bucketing.q,\ cross_prod_1.q,\ diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java index c73ba2c6e9e4cf234017b60086b3a11fdf3a2f36..93dd57c2f418aec4222c19346284ec84e2eab744 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java @@ -769,6 +769,10 @@ public void cacheFileData(StripeData sd) { // Note that we cache each slice separately. We could cache them together at the end, but // then we won't be able to pass them to users without inc-refing explicitly. ColumnEncoding[] encodings = sd.getEncodings(); + // Force creation of cache data entry for root (struct) column if not present. + if (encodings[0] != null && sd.getData()[0] == null) { + createArrayToCache(sd, 0, null); + } for (int i = 0; i < encodings.length; ++i) { // Make data consistent with encodings, don't store useless information. if (sd.getData()[i] == null) { diff --git a/ql/src/test/queries/clientpositive/csv_llap.q b/ql/src/test/queries/clientpositive/csv_llap.q new file mode 100644 index 0000000000000000000000000000000000000000..a39f34b90eac2deea28713c309ee8434a2061a61 --- /dev/null +++ b/ql/src/test/queries/clientpositive/csv_llap.q @@ -0,0 +1,17 @@ +CREATE EXTERNAL TABLE csv_llap_test (ts int, id string, b1 boolean, b2 boolean, b3 boolean, b4 boolean) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.OpenCSVSerde' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'; + +LOAD DATA LOCAL INPATH '../../data/files/small_csv.csv' INTO TABLE csv_llap_test; +--location '../../data/files/small_csv.csv'; + +set hive.tez.exec.print.summary=true; +set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecTezSummaryPrinter; + +SELECT MIN(ts) FROM csv_llap_test; + +SELECT MIN(ts) FROM csv_llap_test; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/llap/csv_llap.q.out b/ql/src/test/results/clientpositive/llap/csv_llap.q.out new file mode 100644 index 0000000000000000000000000000000000000000..6f721495e9b7405be8f92f92e402f90c3ef390a2 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/csv_llap.q.out @@ -0,0 +1,88 @@ +PREHOOK: query: CREATE EXTERNAL TABLE csv_llap_test (ts int, id string, b1 boolean, b2 boolean, b3 boolean, b4 boolean) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.OpenCSVSerde' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@csv_llap_test +POSTHOOK: query: CREATE EXTERNAL TABLE csv_llap_test (ts int, id string, b1 boolean, b2 boolean, b3 boolean, b4 boolean) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.OpenCSVSerde' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@csv_llap_test +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/small_csv.csv' INTO TABLE csv_llap_test +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@csv_llap_test +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/small_csv.csv' INTO TABLE csv_llap_test +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@csv_llap_test +PREHOOK: query: SELECT MIN(ts) FROM csv_llap_test +PREHOOK: type: QUERY +PREHOOK: Input: default@csv_llap_test +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 32 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 + RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 + RECORDS_OUT_OPERATOR_FS_11: 1 + RECORDS_OUT_OPERATOR_GBY_10: 1 + RECORDS_OUT_OPERATOR_GBY_8: 1 + RECORDS_OUT_OPERATOR_MAP_0: 0 + RECORDS_OUT_OPERATOR_RS_9: 1 + RECORDS_OUT_OPERATOR_SEL_7: 32 + RECORDS_OUT_OPERATOR_TS_0: 32 +Stage-1 LLAP IO COUNTERS: + CACHE_MISS_BYTES: 2280 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 32 +Stage-1 INPUT COUNTERS: + GROUPED_INPUT_SPLITS_Map_1: 1 + INPUT_DIRECTORIES_Map_1: 1 + INPUT_FILES_Map_1: 1 + RAW_INPUT_SPLITS_Map_1: 1 +00117 +PREHOOK: query: SELECT MIN(ts) FROM csv_llap_test +PREHOOK: type: QUERY +PREHOOK: Input: default@csv_llap_test +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 32 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 + RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 + RECORDS_OUT_OPERATOR_FS_11: 1 + RECORDS_OUT_OPERATOR_GBY_10: 1 + RECORDS_OUT_OPERATOR_GBY_8: 1 + RECORDS_OUT_OPERATOR_MAP_0: 0 + RECORDS_OUT_OPERATOR_RS_9: 1 + RECORDS_OUT_OPERATOR_SEL_7: 32 + RECORDS_OUT_OPERATOR_TS_0: 32 +Stage-1 LLAP IO COUNTERS: + CACHE_HIT_BYTES: 2280 + NUM_DECODED_BATCHES: 1 + NUM_VECTOR_BATCHES: 1 + ROWS_EMITTED: 32 +Stage-1 INPUT COUNTERS: + GROUPED_INPUT_SPLITS_Map_1: 1 + INPUT_DIRECTORIES_Map_1: 1 + INPUT_FILES_Map_1: 1 + RAW_INPUT_SPLITS_Map_1: 1 +00117