diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 42bc5df0a90053c2f794f4494ae1833df1938ab2..d714e6087701dcaa7d9812886defbbdb09d91a96 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -536,6 +536,7 @@ minillaplocal.query.files=\ correlationoptimizer2.q,\ correlationoptimizer4.q,\ correlationoptimizer6.q,\ + csv_llap.q,\ default_constraint.q,\ disable_merge_for_bucketing.q,\ cross_prod_1.q,\ diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java index c9e9c02f3b36dd8b35df097e64ea51d4996cdceb..f489dda894504b6d7c50261baa754944639a8d50 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java @@ -773,6 +773,10 @@ public void cacheFileData(StripeData sd) { // Note that we cache each slice separately. We could cache them together at the end, but // then we won't be able to pass them to users without inc-refing explicitly. ColumnEncoding[] encodings = sd.getEncodings(); + // Force creation of cache data entry for root (struct) column if not present. + if (encodings[0] != null && sd.getData()[0] == null) { + createArrayToCache(sd, 0, null); + } for (int i = 0; i < encodings.length; ++i) { // Make data consistent with encodings, don't store useless information. if (sd.getData()[i] == null) { diff --git a/ql/src/test/queries/clientpositive/csv_llap.q b/ql/src/test/queries/clientpositive/csv_llap.q new file mode 100644 index 0000000000000000000000000000000000000000..c262c92ce2210818a4c699c2c29757a3d4f0a8dd --- /dev/null +++ b/ql/src/test/queries/clientpositive/csv_llap.q @@ -0,0 +1,17 @@ +CREATE EXTERNAL TABLE csv_llap_test (ts int, id string, b1 boolean, b2 boolean, b3 boolean, b4 boolean) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.OpenCSVSerde' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'; + +LOAD DATA LOCAL INPATH '../../data/files/small_csv.csv' INTO TABLE csv_llap_test; +--location '../../data/files/small_csv.csv'; + +SELECT MIN(ts) FROM csv_llap_test; + +set hive.llap.io.cache.only=true; +--an exception would be thrown from here on for cache miss + +SELECT MIN(ts) FROM csv_llap_test; diff --git a/ql/src/test/results/clientpositive/llap/csv_llap.q.out b/ql/src/test/results/clientpositive/llap/csv_llap.q.out new file mode 100644 index 0000000000000000000000000000000000000000..1bf6d0986e5c2ed3d0d7f7d77587cc8840bd63f6 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/csv_llap.q.out @@ -0,0 +1,46 @@ +PREHOOK: query: CREATE EXTERNAL TABLE csv_llap_test (ts int, id string, b1 boolean, b2 boolean, b3 boolean, b4 boolean) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.OpenCSVSerde' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@csv_llap_test +POSTHOOK: query: CREATE EXTERNAL TABLE csv_llap_test (ts int, id string, b1 boolean, b2 boolean, b3 boolean, b4 boolean) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.OpenCSVSerde' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@csv_llap_test +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/small_csv.csv' INTO TABLE csv_llap_test +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@csv_llap_test +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/small_csv.csv' INTO TABLE csv_llap_test +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@csv_llap_test +PREHOOK: query: SELECT MIN(ts) FROM csv_llap_test +PREHOOK: type: QUERY +PREHOOK: Input: default@csv_llap_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT MIN(ts) FROM csv_llap_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@csv_llap_test +#### A masked pattern was here #### +00117 +PREHOOK: query: SELECT MIN(ts) FROM csv_llap_test +PREHOOK: type: QUERY +PREHOOK: Input: default@csv_llap_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT MIN(ts) FROM csv_llap_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@csv_llap_test +#### A masked pattern was here #### +00117 diff --git a/ql/src/test/results/clientpositive/llap/llap_io_etl.q.out b/ql/src/test/results/clientpositive/llap/llap_io_etl.q.out index 1a967fafef1d1cfb1cc72bb458845f68f7a7accf..a527469ca3fd5ed8490b8add9a0417b06e2623cb 100644 --- a/ql/src/test/results/clientpositive/llap/llap_io_etl.q.out +++ b/ql/src/test/results/clientpositive/llap/llap_io_etl.q.out @@ -180,7 +180,7 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_OPERATOR_TS_0: 2 TOTAL_TABLE_ROWS_WRITTEN: 2 Stage-1 LLAP IO COUNTERS: - CACHE_MISS_BYTES: 244 + CACHE_HIT_BYTES: 244 NUM_DECODED_BATCHES: 1 NUM_VECTOR_BATCHES: 1 ROWS_EMITTED: 2