diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecOrcFileDump.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecOrcFileDump.java new file mode 100644 index 0000000000000000000000000000000000000000..b0b4a3641b92041ed64a4755ba595cb69bceb765 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/PostExecOrcFileDump.java @@ -0,0 +1,120 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.hooks; + +import java.io.IOException; +import java.io.PrintStream; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.QueryPlan; +import org.apache.hadoop.hive.ql.exec.FetchTask; +import org.apache.hadoop.hive.ql.io.FileFormatException; +import org.apache.hadoop.hive.ql.io.orc.FileDump; +import org.apache.hadoop.hive.ql.io.orc.OrcFile; +import org.apache.hadoop.hive.ql.plan.FetchWork; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.shims.ShimLoader; + +import com.google.common.collect.Lists; + +/** + * Post exec hook to print orc file dump for files that will be read by fetch task. The file dump + * output will be printed before fetch task output. It also prints the row index for the 1st column + * in the file just to verify the impact of bloom filter fpp. + */ +public class PostExecOrcFileDump implements ExecuteWithHookContext { + private static final Log LOG = LogFactory.getLog(PostExecOrcFileDump.class.getName()); + + private static final PathFilter hiddenFileFilter = new PathFilter() { + public boolean accept(Path p) { + String name = p.getName(); + return !name.startsWith("_") && !name.startsWith("."); + } + }; + + @Override + public void run(HookContext hookContext) throws Exception { + assert (hookContext.getHookType() == HookContext.HookType.POST_EXEC_HOOK); + HiveConf conf = hookContext.getConf(); + + LOG.info("Executing post execution hook to print orc file dump.."); + QueryPlan plan = hookContext.getQueryPlan(); + if (plan == null) { + return; + } + + FetchTask fetchTask = plan.getFetchTask(); + if (fetchTask != null) { + SessionState ss = SessionState.get(); + SessionState.LogHelper console = ss.getConsole(); + + // file dump should write to session state console's error stream + PrintStream old = System.out; + System.setOut(console.getErrStream()); + + FetchWork fetchWork = fetchTask.getWork(); + boolean partitionedTable = fetchWork.isPartitioned(); + List directories; + if (partitionedTable) { + LOG.info("Printing orc file dump for files from partitioned directory.."); + directories = fetchWork.getPartDir(); + } else { + LOG.info("Printing orc file dump for files from table directory.."); + directories = Lists.newArrayList(); + directories.add(fetchWork.getTblDir()); + } + + for (Path dir : directories) { + FileSystem fs = dir.getFileSystem(conf); + List fileList = ShimLoader.getHadoopShims().listLocatedStatus(fs, dir, + hiddenFileFilter); + + for (FileStatus fileStatus : fileList) { + LOG.info("Printing orc file dump for " + fileStatus.getPath()); + if (fileStatus.getLen() > 0) { + try { + // just creating orc reader is going to do sanity checks to make sure its valid ORC file + OrcFile.createReader(fs, fileStatus.getPath()); + console.printError("-- BEGIN ORC FILE DUMP --"); + FileDump.main(new String[]{fileStatus.getPath().toString(), "--rowindex=1"}); + console.printError("-- END ORC FILE DUMP --"); + } catch (FileFormatException e) { + LOG.warn("File " + fileStatus.getPath() + " is not ORC. Skip printing orc file dump"); + } catch (IOException e) { + LOG.warn("Skip printing orc file dump. Exception: " + e.getMessage()); + } + } else { + LOG.warn("Zero length file encountered. Skip printing orc file dump."); + } + } + } + + // restore the old out stream + System.out.flush(); + System.setOut(old); + } + } + +} diff --git a/ql/src/test/queries/clientpositive/orc_file_dump.q b/ql/src/test/queries/clientpositive/orc_file_dump.q new file mode 100644 index 0000000000000000000000000000000000000000..ed0da75baf1fa1d23fd9cc9ff8ffbc09964b4183 --- /dev/null +++ b/ql/src/test/queries/clientpositive/orc_file_dump.q @@ -0,0 +1,57 @@ +CREATE TABLE staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging; + +CREATE TABLE orc_ppd(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*"); + +insert overwrite table orc_ppd select * from staging; + +SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecOrcFileDump; + +select * from orc_ppd limit 1; + +alter table orc_ppd set tblproperties("orc.bloom.filter.fpp"="0.01"); + +insert overwrite table orc_ppd select * from staging; + +select * from orc_ppd limit 1; + +CREATE TABLE orc_ppd_part(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +PARTITIONED BY (ds string, hr int) STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*"); + +insert overwrite table orc_ppd_part partition(ds = "2015", hr = 10) select * from staging; + +select * from orc_ppd_part limit 1; diff --git a/ql/src/test/results/clientpositive/orc_file_dump.q.out b/ql/src/test/results/clientpositive/orc_file_dump.q.out new file mode 100644 index 0000000000000000000000000000000000000000..67aa189ed888df3cd0d7dfdfbeefa7936071f209 --- /dev/null +++ b/ql/src/test/results/clientpositive/orc_file_dump.q.out @@ -0,0 +1,447 @@ +PREHOOK: query: CREATE TABLE staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@staging +POSTHOOK: query: CREATE TABLE staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@staging +PREHOOK: query: CREATE TABLE orc_ppd(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_ppd +POSTHOOK: query: CREATE TABLE orc_ppd(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_ppd +PREHOOK: query: insert overwrite table orc_ppd select * from staging +PREHOOK: type: QUERY +PREHOOK: Input: default@staging +PREHOOK: Output: default@orc_ppd +POSTHOOK: query: insert overwrite table orc_ppd select * from staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@staging +POSTHOOK: Output: default@orc_ppd +POSTHOOK: Lineage: orc_ppd.b SIMPLE [(staging)staging.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_ppd.bin SIMPLE [(staging)staging.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: orc_ppd.bo SIMPLE [(staging)staging.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: orc_ppd.d SIMPLE [(staging)staging.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: orc_ppd.dec SIMPLE [(staging)staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +POSTHOOK: Lineage: orc_ppd.f SIMPLE [(staging)staging.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: orc_ppd.i SIMPLE [(staging)staging.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: orc_ppd.s SIMPLE [(staging)staging.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: orc_ppd.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: orc_ppd.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: orc_ppd.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ] +PREHOOK: query: select * from orc_ppd limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +-- BEGIN ORC FILE DUMP -- +#### A masked pattern was here #### +File Version: 0.12 with HIVE_8732 +Rows: 1049 +Compression: ZLIB +Compression size: 262144 +Type: struct<_col0:tinyint,_col1:smallint,_col2:int,_col3:bigint,_col4:float,_col5:double,_col6:boolean,_col7:string,_col8:timestamp,_col9:decimal(4,2),_col10:binary> + +Stripe Statistics: + Stripe 1: + Column 0: count: 1049 hasNull: false + Column 1: count: 1046 hasNull: true min: -3 max: 124 sum: 62430 + Column 2: count: 1046 hasNull: true min: 256 max: 511 sum: 398889 + Column 3: count: 1049 hasNull: false min: 65536 max: 65791 sum: 68881051 + Column 4: count: 1049 hasNull: false min: 4294967296 max: 4294967551 sum: 4505420825953 + Column 5: count: 1049 hasNull: false min: 0.07999999821186066 max: 99.91999816894531 sum: 52744.70002820343 + Column 6: count: 1049 hasNull: false min: 0.02 max: 49.85 sum: 26286.349999999977 + Column 7: count: 1049 hasNull: false true: 526 + Column 8: count: 1049 hasNull: false min: max: zach zipper sum: 13443 + Column 9: count: 1049 hasNull: false min: 2013-03-01 09:11:58.703 max: 2013-03-01 09:11:58.703 + Column 10: count: 1049 hasNull: false min: 0.08 max: 99.94 sum: 53646.16 + Column 11: count: 1049 hasNull: false sum: 13278 + +File Statistics: + Column 0: count: 1049 hasNull: false + Column 1: count: 1046 hasNull: true min: -3 max: 124 sum: 62430 + Column 2: count: 1046 hasNull: true min: 256 max: 511 sum: 398889 + Column 3: count: 1049 hasNull: false min: 65536 max: 65791 sum: 68881051 + Column 4: count: 1049 hasNull: false min: 4294967296 max: 4294967551 sum: 4505420825953 + Column 5: count: 1049 hasNull: false min: 0.07999999821186066 max: 99.91999816894531 sum: 52744.70002820343 + Column 6: count: 1049 hasNull: false min: 0.02 max: 49.85 sum: 26286.349999999977 + Column 7: count: 1049 hasNull: false true: 526 + Column 8: count: 1049 hasNull: false min: max: zach zipper sum: 13443 + Column 9: count: 1049 hasNull: false min: 2013-03-01 09:11:58.703 max: 2013-03-01 09:11:58.703 + Column 10: count: 1049 hasNull: false min: 0.08 max: 99.94 sum: 53646.16 + Column 11: count: 1049 hasNull: false sum: 13278 + +Stripes: + Stripe: offset: 3 data: 22636 rows: 1049 tail: 249 index: 9944 + Stream: column 0 section ROW_INDEX start: 3 length 20 + Stream: column 0 section BLOOM_FILTER start: 23 length 45 + Stream: column 1 section ROW_INDEX start: 68 length 58 + Stream: column 1 section BLOOM_FILTER start: 126 length 799 + Stream: column 2 section ROW_INDEX start: 925 length 58 + Stream: column 2 section BLOOM_FILTER start: 983 length 978 + Stream: column 3 section ROW_INDEX start: 1961 length 61 + Stream: column 3 section BLOOM_FILTER start: 2022 length 983 + Stream: column 4 section ROW_INDEX start: 3005 length 69 + Stream: column 4 section BLOOM_FILTER start: 3074 length 963 + Stream: column 5 section ROW_INDEX start: 4037 length 78 + Stream: column 5 section BLOOM_FILTER start: 4115 length 1291 + Stream: column 6 section ROW_INDEX start: 5406 length 85 + Stream: column 6 section BLOOM_FILTER start: 5491 length 1280 + Stream: column 7 section ROW_INDEX start: 6771 length 41 + Stream: column 7 section BLOOM_FILTER start: 6812 length 45 + Stream: column 8 section ROW_INDEX start: 6857 length 86 + Stream: column 8 section BLOOM_FILTER start: 6943 length 1157 + Stream: column 9 section ROW_INDEX start: 8100 length 51 + Stream: column 9 section BLOOM_FILTER start: 8151 length 62 + Stream: column 10 section ROW_INDEX start: 8213 length 82 + Stream: column 10 section BLOOM_FILTER start: 8295 length 1297 + Stream: column 11 section ROW_INDEX start: 9592 length 47 + Stream: column 11 section BLOOM_FILTER start: 9639 length 308 + Stream: column 1 section PRESENT start: 9947 length 17 + Stream: column 1 section DATA start: 9964 length 962 + Stream: column 2 section PRESENT start: 10926 length 17 + Stream: column 2 section DATA start: 10943 length 1441 + Stream: column 3 section DATA start: 12384 length 1704 + Stream: column 4 section DATA start: 14088 length 1998 + Stream: column 5 section DATA start: 16086 length 2925 + Stream: column 6 section DATA start: 19011 length 3323 + Stream: column 7 section DATA start: 22334 length 137 + Stream: column 8 section DATA start: 22471 length 1572 + Stream: column 8 section LENGTH start: 24043 length 310 + Stream: column 8 section DICTIONARY_DATA start: 24353 length 1548 + Stream: column 9 section DATA start: 25901 length 62 + Stream: column 9 section SECONDARY start: 25963 length 1783 + Stream: column 10 section DATA start: 27746 length 2138 + Stream: column 10 section SECONDARY start: 29884 length 231 + Stream: column 11 section DATA start: 30115 length 1877 + Stream: column 11 section LENGTH start: 31992 length 591 + Encoding column 0: DIRECT + Encoding column 1: DIRECT + Encoding column 2: DIRECT_V2 + Encoding column 3: DIRECT_V2 + Encoding column 4: DIRECT_V2 + Encoding column 5: DIRECT + Encoding column 6: DIRECT + Encoding column 7: DIRECT + Encoding column 8: DICTIONARY_V2[516] + Encoding column 9: DIRECT_V2 + Encoding column 10: DIRECT_V2 + Encoding column 11: DIRECT_V2 + Row group indices for column 1: + Entry 0: count: 997 hasNull: true min: -3 max: 124 sum: 59325 positions: 0,0,0,0,0,0,0 + Entry 1: count: 49 hasNull: false min: 2 max: 123 sum: 3105 positions: 0,10,113,0,0,903,101 + Bloom filters for column 1: + Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 492 loadFactor: 0.0784 expectedFpp: 3.7864847E-5 + Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 168 loadFactor: 0.0268 expectedFpp: 5.147697E-7 + Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 492 loadFactor: 0.0784 expectedFpp: 3.7864847E-5 + +File length: 33456 bytes +Padding length: 0 bytes +Padding ratio: 0% +-- END ORC FILE DUMP -- +124 336 65664 4294967435 74.72 42.47 true bob davidson 2013-03-01 09:11:58.703302 45.4 yard duty +PREHOOK: query: alter table orc_ppd set tblproperties("orc.bloom.filter.fpp"="0.01") +PREHOOK: type: ALTERTABLE_PROPERTIES +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: insert overwrite table orc_ppd select * from staging +PREHOOK: type: QUERY +PREHOOK: Input: default@staging +PREHOOK: Output: default@orc_ppd +PREHOOK: query: select * from orc_ppd limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +-- BEGIN ORC FILE DUMP -- +#### A masked pattern was here #### +File Version: 0.12 with HIVE_8732 +Rows: 1049 +Compression: ZLIB +Compression size: 262144 +Type: struct<_col0:tinyint,_col1:smallint,_col2:int,_col3:bigint,_col4:float,_col5:double,_col6:boolean,_col7:string,_col8:timestamp,_col9:decimal(4,2),_col10:binary> + +Stripe Statistics: + Stripe 1: + Column 0: count: 1049 hasNull: false + Column 1: count: 1046 hasNull: true min: -3 max: 124 sum: 62430 + Column 2: count: 1046 hasNull: true min: 256 max: 511 sum: 398889 + Column 3: count: 1049 hasNull: false min: 65536 max: 65791 sum: 68881051 + Column 4: count: 1049 hasNull: false min: 4294967296 max: 4294967551 sum: 4505420825953 + Column 5: count: 1049 hasNull: false min: 0.07999999821186066 max: 99.91999816894531 sum: 52744.70002820343 + Column 6: count: 1049 hasNull: false min: 0.02 max: 49.85 sum: 26286.349999999977 + Column 7: count: 1049 hasNull: false true: 526 + Column 8: count: 1049 hasNull: false min: max: zach zipper sum: 13443 + Column 9: count: 1049 hasNull: false min: 2013-03-01 09:11:58.703 max: 2013-03-01 09:11:58.703 + Column 10: count: 1049 hasNull: false min: 0.08 max: 99.94 sum: 53646.16 + Column 11: count: 1049 hasNull: false sum: 13278 + +File Statistics: + Column 0: count: 1049 hasNull: false + Column 1: count: 1046 hasNull: true min: -3 max: 124 sum: 62430 + Column 2: count: 1046 hasNull: true min: 256 max: 511 sum: 398889 + Column 3: count: 1049 hasNull: false min: 65536 max: 65791 sum: 68881051 + Column 4: count: 1049 hasNull: false min: 4294967296 max: 4294967551 sum: 4505420825953 + Column 5: count: 1049 hasNull: false min: 0.07999999821186066 max: 99.91999816894531 sum: 52744.70002820343 + Column 6: count: 1049 hasNull: false min: 0.02 max: 49.85 sum: 26286.349999999977 + Column 7: count: 1049 hasNull: false true: 526 + Column 8: count: 1049 hasNull: false min: max: zach zipper sum: 13443 + Column 9: count: 1049 hasNull: false min: 2013-03-01 09:11:58.703 max: 2013-03-01 09:11:58.703 + Column 10: count: 1049 hasNull: false min: 0.08 max: 99.94 sum: 53646.16 + Column 11: count: 1049 hasNull: false sum: 13278 + +Stripes: + Stripe: offset: 3 data: 22636 rows: 1049 tail: 251 index: 15096 + Stream: column 0 section ROW_INDEX start: 3 length 20 + Stream: column 0 section BLOOM_FILTER start: 23 length 56 + Stream: column 1 section ROW_INDEX start: 79 length 58 + Stream: column 1 section BLOOM_FILTER start: 137 length 1258 + Stream: column 2 section ROW_INDEX start: 1395 length 58 + Stream: column 2 section BLOOM_FILTER start: 1453 length 1544 + Stream: column 3 section ROW_INDEX start: 2997 length 61 + Stream: column 3 section BLOOM_FILTER start: 3058 length 1543 + Stream: column 4 section ROW_INDEX start: 4601 length 69 + Stream: column 4 section BLOOM_FILTER start: 4670 length 1556 + Stream: column 5 section ROW_INDEX start: 6226 length 78 + Stream: column 5 section BLOOM_FILTER start: 6304 length 1991 + Stream: column 6 section ROW_INDEX start: 8295 length 85 + Stream: column 6 section BLOOM_FILTER start: 8380 length 1964 + Stream: column 7 section ROW_INDEX start: 10344 length 41 + Stream: column 7 section BLOOM_FILTER start: 10385 length 56 + Stream: column 8 section ROW_INDEX start: 10441 length 86 + Stream: column 8 section BLOOM_FILTER start: 10527 length 1829 + Stream: column 9 section ROW_INDEX start: 12356 length 51 + Stream: column 9 section BLOOM_FILTER start: 12407 length 95 + Stream: column 10 section ROW_INDEX start: 12502 length 82 + Stream: column 10 section BLOOM_FILTER start: 12584 length 1994 + Stream: column 11 section ROW_INDEX start: 14578 length 47 + Stream: column 11 section BLOOM_FILTER start: 14625 length 474 + Stream: column 1 section PRESENT start: 15099 length 17 + Stream: column 1 section DATA start: 15116 length 962 + Stream: column 2 section PRESENT start: 16078 length 17 + Stream: column 2 section DATA start: 16095 length 1441 + Stream: column 3 section DATA start: 17536 length 1704 + Stream: column 4 section DATA start: 19240 length 1998 + Stream: column 5 section DATA start: 21238 length 2925 + Stream: column 6 section DATA start: 24163 length 3323 + Stream: column 7 section DATA start: 27486 length 137 + Stream: column 8 section DATA start: 27623 length 1572 + Stream: column 8 section LENGTH start: 29195 length 310 + Stream: column 8 section DICTIONARY_DATA start: 29505 length 1548 + Stream: column 9 section DATA start: 31053 length 62 + Stream: column 9 section SECONDARY start: 31115 length 1783 + Stream: column 10 section DATA start: 32898 length 2138 + Stream: column 10 section SECONDARY start: 35036 length 231 + Stream: column 11 section DATA start: 35267 length 1877 + Stream: column 11 section LENGTH start: 37144 length 591 + Encoding column 0: DIRECT + Encoding column 1: DIRECT + Encoding column 2: DIRECT_V2 + Encoding column 3: DIRECT_V2 + Encoding column 4: DIRECT_V2 + Encoding column 5: DIRECT + Encoding column 6: DIRECT + Encoding column 7: DIRECT + Encoding column 8: DICTIONARY_V2[516] + Encoding column 9: DIRECT_V2 + Encoding column 10: DIRECT_V2 + Encoding column 11: DIRECT_V2 + Row group indices for column 1: + Entry 0: count: 997 hasNull: true min: -3 max: 124 sum: 59325 positions: 0,0,0,0,0,0,0 + Entry 1: count: 49 hasNull: false min: 2 max: 123 sum: 3105 positions: 0,10,113,0,0,903,101 + Bloom filters for column 1: + Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 849 loadFactor: 0.0884 expectedFpp: 4.231118E-8 + Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 285 loadFactor: 0.0297 expectedFpp: 2.0324289E-11 + Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 849 loadFactor: 0.0884 expectedFpp: 4.231118E-8 + +File length: 38610 bytes +Padding length: 0 bytes +Padding ratio: 0% +-- END ORC FILE DUMP -- +124 336 65664 4294967435 74.72 42.47 true bob davidson 2013-03-01 09:11:58.703302 45.4 yard duty +PREHOOK: query: CREATE TABLE orc_ppd_part(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +PARTITIONED BY (ds string, hr int) STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_ppd_part +PREHOOK: query: insert overwrite table orc_ppd_part partition(ds = "2015", hr = 10) select * from staging +PREHOOK: type: QUERY +PREHOOK: Input: default@staging +PREHOOK: Output: default@orc_ppd_part@ds=2015/hr=10 +PREHOOK: query: select * from orc_ppd_part limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd_part +PREHOOK: Input: default@orc_ppd_part@ds=2015/hr=10 +#### A masked pattern was here #### +-- BEGIN ORC FILE DUMP -- +#### A masked pattern was here #### +File Version: 0.12 with HIVE_8732 +Rows: 1049 +Compression: ZLIB +Compression size: 262144 +Type: struct<_col0:tinyint,_col1:smallint,_col2:int,_col3:bigint,_col4:float,_col5:double,_col6:boolean,_col7:string,_col8:timestamp,_col9:decimal(4,2),_col10:binary> + +Stripe Statistics: + Stripe 1: + Column 0: count: 1049 hasNull: false + Column 1: count: 1046 hasNull: true min: -3 max: 124 sum: 62430 + Column 2: count: 1046 hasNull: true min: 256 max: 511 sum: 398889 + Column 3: count: 1049 hasNull: false min: 65536 max: 65791 sum: 68881051 + Column 4: count: 1049 hasNull: false min: 4294967296 max: 4294967551 sum: 4505420825953 + Column 5: count: 1049 hasNull: false min: 0.07999999821186066 max: 99.91999816894531 sum: 52744.70002820343 + Column 6: count: 1049 hasNull: false min: 0.02 max: 49.85 sum: 26286.349999999977 + Column 7: count: 1049 hasNull: false true: 526 + Column 8: count: 1049 hasNull: false min: max: zach zipper sum: 13443 + Column 9: count: 1049 hasNull: false min: 2013-03-01 09:11:58.703 max: 2013-03-01 09:11:58.703 + Column 10: count: 1049 hasNull: false min: 0.08 max: 99.94 sum: 53646.16 + Column 11: count: 1049 hasNull: false sum: 13278 + +File Statistics: + Column 0: count: 1049 hasNull: false + Column 1: count: 1046 hasNull: true min: -3 max: 124 sum: 62430 + Column 2: count: 1046 hasNull: true min: 256 max: 511 sum: 398889 + Column 3: count: 1049 hasNull: false min: 65536 max: 65791 sum: 68881051 + Column 4: count: 1049 hasNull: false min: 4294967296 max: 4294967551 sum: 4505420825953 + Column 5: count: 1049 hasNull: false min: 0.07999999821186066 max: 99.91999816894531 sum: 52744.70002820343 + Column 6: count: 1049 hasNull: false min: 0.02 max: 49.85 sum: 26286.349999999977 + Column 7: count: 1049 hasNull: false true: 526 + Column 8: count: 1049 hasNull: false min: max: zach zipper sum: 13443 + Column 9: count: 1049 hasNull: false min: 2013-03-01 09:11:58.703 max: 2013-03-01 09:11:58.703 + Column 10: count: 1049 hasNull: false min: 0.08 max: 99.94 sum: 53646.16 + Column 11: count: 1049 hasNull: false sum: 13278 + +Stripes: + Stripe: offset: 3 data: 22636 rows: 1049 tail: 249 index: 9944 + Stream: column 0 section ROW_INDEX start: 3 length 20 + Stream: column 0 section BLOOM_FILTER start: 23 length 45 + Stream: column 1 section ROW_INDEX start: 68 length 58 + Stream: column 1 section BLOOM_FILTER start: 126 length 799 + Stream: column 2 section ROW_INDEX start: 925 length 58 + Stream: column 2 section BLOOM_FILTER start: 983 length 978 + Stream: column 3 section ROW_INDEX start: 1961 length 61 + Stream: column 3 section BLOOM_FILTER start: 2022 length 983 + Stream: column 4 section ROW_INDEX start: 3005 length 69 + Stream: column 4 section BLOOM_FILTER start: 3074 length 963 + Stream: column 5 section ROW_INDEX start: 4037 length 78 + Stream: column 5 section BLOOM_FILTER start: 4115 length 1291 + Stream: column 6 section ROW_INDEX start: 5406 length 85 + Stream: column 6 section BLOOM_FILTER start: 5491 length 1280 + Stream: column 7 section ROW_INDEX start: 6771 length 41 + Stream: column 7 section BLOOM_FILTER start: 6812 length 45 + Stream: column 8 section ROW_INDEX start: 6857 length 86 + Stream: column 8 section BLOOM_FILTER start: 6943 length 1157 + Stream: column 9 section ROW_INDEX start: 8100 length 51 + Stream: column 9 section BLOOM_FILTER start: 8151 length 62 + Stream: column 10 section ROW_INDEX start: 8213 length 82 + Stream: column 10 section BLOOM_FILTER start: 8295 length 1297 + Stream: column 11 section ROW_INDEX start: 9592 length 47 + Stream: column 11 section BLOOM_FILTER start: 9639 length 308 + Stream: column 1 section PRESENT start: 9947 length 17 + Stream: column 1 section DATA start: 9964 length 962 + Stream: column 2 section PRESENT start: 10926 length 17 + Stream: column 2 section DATA start: 10943 length 1441 + Stream: column 3 section DATA start: 12384 length 1704 + Stream: column 4 section DATA start: 14088 length 1998 + Stream: column 5 section DATA start: 16086 length 2925 + Stream: column 6 section DATA start: 19011 length 3323 + Stream: column 7 section DATA start: 22334 length 137 + Stream: column 8 section DATA start: 22471 length 1572 + Stream: column 8 section LENGTH start: 24043 length 310 + Stream: column 8 section DICTIONARY_DATA start: 24353 length 1548 + Stream: column 9 section DATA start: 25901 length 62 + Stream: column 9 section SECONDARY start: 25963 length 1783 + Stream: column 10 section DATA start: 27746 length 2138 + Stream: column 10 section SECONDARY start: 29884 length 231 + Stream: column 11 section DATA start: 30115 length 1877 + Stream: column 11 section LENGTH start: 31992 length 591 + Encoding column 0: DIRECT + Encoding column 1: DIRECT + Encoding column 2: DIRECT_V2 + Encoding column 3: DIRECT_V2 + Encoding column 4: DIRECT_V2 + Encoding column 5: DIRECT + Encoding column 6: DIRECT + Encoding column 7: DIRECT + Encoding column 8: DICTIONARY_V2[516] + Encoding column 9: DIRECT_V2 + Encoding column 10: DIRECT_V2 + Encoding column 11: DIRECT_V2 + Row group indices for column 1: + Entry 0: count: 997 hasNull: true min: -3 max: 124 sum: 59325 positions: 0,0,0,0,0,0,0 + Entry 1: count: 49 hasNull: false min: 2 max: 123 sum: 3105 positions: 0,10,113,0,0,903,101 + Bloom filters for column 1: + Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 492 loadFactor: 0.0784 expectedFpp: 3.7864847E-5 + Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 168 loadFactor: 0.0268 expectedFpp: 5.147697E-7 + Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 492 loadFactor: 0.0784 expectedFpp: 3.7864847E-5 + +File length: 33456 bytes +Padding length: 0 bytes +Padding ratio: 0% +-- END ORC FILE DUMP -- +124 336 65664 4294967435 74.72 42.47 true bob davidson 2013-03-01 09:11:58.703302 45.4 yard duty 2015 10