diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index f356663..e5f40e6 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -184,6 +184,7 @@ minitez.query.files.shared=acid_globallimit.q,\ orc_ppd_schema_evol_1b.q,\ orc_ppd_schema_evol_2a.q,\ orc_ppd_schema_evol_2b.q,\ + orc_ppd_schema_evol_3a.q,\ orc_vectorization_ppd.q,\ parallel.q,\ ptf.q,\ diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java index 1dcd2cd..93c40e4 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java @@ -26,9 +26,11 @@ import java.util.List; import org.apache.hadoop.hive.llap.counters.LlapIOCounters; -import org.apache.hadoop.hive.llap.metrics.LlapDaemonIOMetrics; +import org.apache.orc.OrcUtils; +import org.apache.orc.TypeDescription; import org.apache.orc.impl.DataReaderProperties; import org.apache.orc.impl.OrcIndex; +import org.apache.orc.impl.SchemaEvolution; import org.apache.tez.common.counters.TezCounters; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -704,8 +706,10 @@ private boolean determineRgsToRead(boolean[] globalIncludes, int rowIndexStride, List types = fileMetadata.getTypes(); String[] colNamesForSarg = OrcInputFormat.getSargColumnNames( columnNames, types, globalIncludes, fileMetadata.isOriginalFormat()); + TypeDescription schema = OrcUtils.convertTypeFromProtobuf(types, 0); + SchemaEvolution schemaEvolution = new SchemaEvolution(schema, globalIncludes); sargApp = new RecordReaderImpl.SargApplier(sarg, colNamesForSarg, - rowIndexStride, types, globalIncludes.length); + rowIndexStride, globalIncludes.length, schemaEvolution); } boolean hasAnyData = false; // readState should have been initialized by this time with an empty array. diff --git a/orc/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java b/orc/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java index 03378a9..c347181 100644 --- a/orc/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java +++ b/orc/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java @@ -2788,8 +2788,7 @@ public static TreeReader createConvertTreeReader(TypeDescription readerType, } } - public static boolean canConvert(TypeDescription fileType, TypeDescription readerType) - throws IOException { + public static boolean canConvert(TypeDescription fileType, TypeDescription readerType) { Category readerTypeCategory = readerType.getCategory(); diff --git a/orc/src/java/org/apache/orc/impl/RecordReaderImpl.java b/orc/src/java/org/apache/orc/impl/RecordReaderImpl.java index eb43ed6..92b6a8b 100644 --- a/orc/src/java/org/apache/orc/impl/RecordReaderImpl.java +++ b/orc/src/java/org/apache/orc/impl/RecordReaderImpl.java @@ -44,8 +44,6 @@ import org.apache.orc.TimestampColumnStatistics; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.io.DiskRange; import org.apache.hadoop.hive.common.io.DiskRangeList; @@ -164,21 +162,11 @@ protected RecordReaderImpl(ReaderImpl fileReader, this.bufferSize = fileReader.bufferSize; this.rowIndexStride = fileReader.rowIndexStride; SearchArgument sarg = options.getSearchArgument(); - // We want to use the sarg for predicate evaluation but we have data type conversion - // (i.e Schema Evolution), so we currently ignore it. - if (sarg != null && rowIndexStride != 0 && !evolution.hasConversion()) { - sargApp = new SargApplier( - sarg, options.getColumnNames(), rowIndexStride, types, - included.length); + if (sarg != null && rowIndexStride != 0) { + sargApp = new SargApplier(sarg, options.getColumnNames(), rowIndexStride, + included.length, evolution); } else { sargApp = null; - if (evolution.hasConversion()) { - if (LOG.isDebugEnabled()) { - LOG.debug( - "Skipping stripe elimination for {} since the schema has data type conversion", - fileReader.path); - } - } } long rows = 0; long skippedRows = 0; @@ -720,9 +708,10 @@ private static Object getBaseObjectForComparison(PredicateLeaf.Type type, Object private final long rowIndexStride; // same as the above array, but indices are set to true private final boolean[] sargColumns; + private SchemaEvolution evolution; public SargApplier(SearchArgument sarg, String[] columnNames, long rowIndexStride, - List types, int includedCount) { + int includedCount, final SchemaEvolution evolution) { this.sarg = sarg; sargLeaves = sarg.getLeaves(); filterColumns = mapSargColumnsToOrcInternalColIdx(sargLeaves, columnNames, 0); @@ -735,6 +724,7 @@ public SargApplier(SearchArgument sarg, String[] columnNames, long rowIndexStrid sargColumns[i] = true; } } + this.evolution = evolution; } /** @@ -764,10 +754,14 @@ public SargApplier(SearchArgument sarg, String[] columnNames, long rowIndexStrid } OrcProto.ColumnStatistics stats = entry.getStatistics(); OrcProto.BloomFilter bf = null; - if (bloomFilterIndices != null && bloomFilterIndices[filterColumns[pred]] != null) { - bf = bloomFilterIndices[filterColumns[pred]].getBloomFilter(rowGroup); + if (bloomFilterIndices != null && bloomFilterIndices[columnIx] != null) { + bf = bloomFilterIndices[columnIx].getBloomFilter(rowGroup); + } + if (evolution != null && evolution.isPPDSafeConversion(columnIx)) { + leafValues[pred] = evaluatePredicateProto(stats, sargLeaves.get(pred), bf); + } else { + leafValues[pred] = TruthValue.YES_NO_NULL; } - leafValues[pred] = evaluatePredicateProto(stats, sargLeaves.get(pred), bf); if (LOG.isTraceEnabled()) { LOG.trace("Stats = " + stats); LOG.trace("Setting " + sargLeaves.get(pred) + " to " + leafValues[pred]); diff --git a/orc/src/java/org/apache/orc/impl/SchemaEvolution.java b/orc/src/java/org/apache/orc/impl/SchemaEvolution.java index ce3af7a..7379de9 100644 --- a/orc/src/java/org/apache/orc/impl/SchemaEvolution.java +++ b/orc/src/java/org/apache/orc/impl/SchemaEvolution.java @@ -18,13 +18,10 @@ package org.apache.orc.impl; -import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.orc.TypeDescription; /** @@ -32,35 +29,40 @@ * has been schema evolution. */ public class SchemaEvolution { + // indexed by reader column id private final TypeDescription[] readerFileTypes; + // indexed by reader column id private final boolean[] included; + private final TypeDescription fileSchema; private final TypeDescription readerSchema; private boolean hasConversion; - private static final Log LOG = LogFactory.getLog(SchemaEvolution.class); + // indexed by reader column id + private final boolean[] ppdSafeConversion; - public SchemaEvolution(TypeDescription readerSchema, boolean[] included) { - this.included = (included == null ? null : Arrays.copyOf(included, included.length)); - this.readerSchema = readerSchema; - - hasConversion = false; - - readerFileTypes = new TypeDescription[this.readerSchema.getMaximumId() + 1]; - buildSameSchemaFileTypesArray(); + public SchemaEvolution(TypeDescription fileSchema, boolean[] includedCols) { + this(fileSchema, null, includedCols); } public SchemaEvolution(TypeDescription fileSchema, TypeDescription readerSchema, - boolean[] included) throws IOException { - this.included = (included == null ? null : Arrays.copyOf(included, included.length)); - if (checkAcidSchema(fileSchema)) { - this.readerSchema = createEventSchema(readerSchema); + boolean[] includeCols) { + this.included = includeCols == null ? null : Arrays.copyOf(includeCols, includeCols.length); + this.hasConversion = false; + this.fileSchema = fileSchema; + if (readerSchema != null) { + if (checkAcidSchema(fileSchema)) { + this.readerSchema = createEventSchema(readerSchema); + } else { + this.readerSchema = readerSchema; + } + this.readerFileTypes = new TypeDescription[this.readerSchema.getMaximumId() + 1]; + buildConversionFileTypesArray(fileSchema, this.readerSchema); } else { - this.readerSchema = readerSchema; + this.readerSchema = fileSchema; + this.readerFileTypes = new TypeDescription[this.readerSchema.getMaximumId() + 1]; + buildSameSchemaFileTypesArray(); } - - hasConversion = false; - readerFileTypes = new TypeDescription[this.readerSchema.getMaximumId() + 1]; - buildConversionFileTypesArray(fileSchema, this.readerSchema); + this.ppdSafeConversion = populatePpdSafeConversion(); } public TypeDescription getReaderSchema() { @@ -81,15 +83,114 @@ public TypeDescription getFileType(TypeDescription readerType) { /** * Get the file type by reader type id. - * @param readerType + * @param id reader column id * @return */ public TypeDescription getFileType(int id) { return readerFileTypes[id]; } + /** + * Check if column is safe for ppd evaluation + * @param colId reader column id + * @return true if the specified column is safe for ppd evaluation else false + */ + public boolean isPPDSafeConversion(final int colId) { + if (hasConversion()) { + if (colId < 0 || colId >= ppdSafeConversion.length) { + return false; + } + return ppdSafeConversion[colId]; + } + + // when there is no schema evolution PPD is safe + return true; + } + + private boolean[] populatePpdSafeConversion() { + if (fileSchema == null || readerSchema == null || readerFileTypes == null) { + return null; + } + + boolean[] result = new boolean[readerSchema.getMaximumId() + 1]; + boolean safePpd = validatePPDConversion(fileSchema, readerSchema); + result[readerSchema.getId()] = safePpd; + List children = readerSchema.getChildren(); + if (children != null) { + for (TypeDescription child : children) { + TypeDescription fileType = getFileType(child.getId()); + safePpd = validatePPDConversion(fileType, child); + result[child.getId()] = safePpd; + } + } + return result; + } + + private boolean validatePPDConversion(final TypeDescription fileType, + final TypeDescription readerType) { + if (fileType == null) { + return false; + } + if (fileType.getCategory().isPrimitive()) { + if (fileType.getCategory().equals(readerType.getCategory())) { + // for decimals alone do equality check to not mess up with precision change + if (fileType.getCategory().equals(TypeDescription.Category.DECIMAL) && + !fileType.equals(readerType)) { + return false; + } + return true; + } + + // only integer and string evolutions are safe + // byte -> short -> int -> long + // string <-> char <-> varchar + // NOTE: Float to double evolution is not safe as floats are stored as doubles in ORC's + // internal index, but when doing predicate evaluation for queries like "select * from + // orc_float where f = 74.72" the constant on the filter is converted from string -> double + // so the precisions will be different and the comparison will fail. + // Soon, we should convert all sargs that compare equality between floats or + // doubles to range predicates. + + // Similarly string -> char and varchar -> char and vice versa is not possible, as ORC stores + // char with padded spaces in its internal index. + switch (fileType.getCategory()) { + case BYTE: + if (readerType.getCategory().equals(TypeDescription.Category.SHORT) || + readerType.getCategory().equals(TypeDescription.Category.INT) || + readerType.getCategory().equals(TypeDescription.Category.LONG)) { + return true; + } + break; + case SHORT: + if (readerType.getCategory().equals(TypeDescription.Category.INT) || + readerType.getCategory().equals(TypeDescription.Category.LONG)) { + return true; + } + break; + case INT: + if (readerType.getCategory().equals(TypeDescription.Category.LONG)) { + return true; + } + break; + case STRING: + if (readerType.getCategory().equals(TypeDescription.Category.VARCHAR)) { + return true; + } + break; + case VARCHAR: + if (readerType.getCategory().equals(TypeDescription.Category.STRING)) { + return true; + } + break; + default: + break; + } + } + return false; + } + void buildConversionFileTypesArray(TypeDescription fileType, - TypeDescription readerType) throws IOException { + TypeDescription readerType) { // if the column isn't included, don't map it if (included != null && !included[readerType.getId()]) { return; @@ -171,7 +272,7 @@ void buildConversionFileTypesArray(TypeDescription fileType, } readerFileTypes[id] = fileType; } else { - throw new IOException( + throw new IllegalArgumentException( String.format( "ORC does not support type conversion from file type %s (%d) to reader type %s (%d)", fileType.toString(), fileType.getId(), diff --git a/orc/src/test/org/apache/orc/impl/TestSchemaEvolution.java b/orc/src/test/org/apache/orc/impl/TestSchemaEvolution.java index a9c64fa..c28af94 100644 --- a/orc/src/test/org/apache/orc/impl/TestSchemaEvolution.java +++ b/orc/src/test/org/apache/orc/impl/TestSchemaEvolution.java @@ -120,7 +120,8 @@ public void testDataTypeConversion2() throws IOException { .withPrecision(20).withScale(10))) .addField("f2", TypeDescription.createStruct() .addField("f3", TypeDescription.createDate()) - .addField("f4", TypeDescription.createDouble())) + .addField("f4", TypeDescription.createDouble()) + .addField("f5", TypeDescription.createByte())) .addField("f6", TypeDescription.createChar().withMaxLength(100)); SchemaEvolution both2diff = new SchemaEvolution(fileStruct2, readerStruct2diff, null); assertTrue(both2diff.hasConversion()); @@ -131,7 +132,8 @@ public void testDataTypeConversion2() throws IOException { .withPrecision(20).withScale(10))) .addField("f2", TypeDescription.createStruct() .addField("f3", TypeDescription.createDate()) - .addField("f4", TypeDescription.createDouble())) + .addField("f4", TypeDescription.createDouble()) + .addField("f5", TypeDescription.createBoolean())) .addField("f6", TypeDescription.createChar().withMaxLength(80)); SchemaEvolution both2diffChar = new SchemaEvolution(fileStruct2, readerStruct2diffChar, null); assertTrue(both2diffChar.hasConversion()); @@ -163,4 +165,305 @@ public void testFloatToDoubleEvolution() throws Exception { assertEquals(74.72, ((DoubleColumnVector) batch.cols[0]).vector[0], 0.00000000001); rows.close(); } + + @Test + public void testSafePpdEvaluation() throws IOException { + TypeDescription fileStruct1 = TypeDescription.createStruct() + .addField("f1", TypeDescription.createInt()) + .addField("f2", TypeDescription.createString()) + .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10)); + SchemaEvolution same1 = new SchemaEvolution(fileStruct1, null); + assertTrue(same1.isPPDSafeConversion(0)); + assertFalse(same1.hasConversion()); + TypeDescription readerStruct1 = TypeDescription.createStruct() + .addField("f1", TypeDescription.createInt()) + .addField("f2", TypeDescription.createString()) + .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10)); + SchemaEvolution both1 = new SchemaEvolution(fileStruct1, readerStruct1, null); + assertFalse(both1.hasConversion()); + assertTrue(both1.isPPDSafeConversion(0)); + assertTrue(both1.isPPDSafeConversion(1)); + assertTrue(both1.isPPDSafeConversion(2)); + assertTrue(both1.isPPDSafeConversion(3)); + + // int -> long + TypeDescription readerStruct1diff = TypeDescription.createStruct() + .addField("f1", TypeDescription.createLong()) + .addField("f2", TypeDescription.createString()) + .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10)); + SchemaEvolution both1diff = new SchemaEvolution(fileStruct1, readerStruct1diff, null); + assertTrue(both1diff.hasConversion()); + assertFalse(both1diff.isPPDSafeConversion(0)); + assertTrue(both1diff.isPPDSafeConversion(1)); + assertTrue(both1diff.isPPDSafeConversion(2)); + assertTrue(both1diff.isPPDSafeConversion(3)); + + // decimal(38,10) -> decimal(12, 10) + TypeDescription readerStruct1diffPrecision = TypeDescription.createStruct() + .addField("f1", TypeDescription.createInt()) + .addField("f2", TypeDescription.createString()) + .addField("f3", TypeDescription.createDecimal().withPrecision(12).withScale(10)); + SchemaEvolution both1diffPrecision = new SchemaEvolution(fileStruct1, readerStruct1diffPrecision, + new boolean[] {true, false, false, true}); + assertTrue(both1diffPrecision.hasConversion()); + assertFalse(both1diffPrecision.isPPDSafeConversion(0)); + assertFalse(both1diffPrecision.isPPDSafeConversion(1)); // column not included + assertFalse(both1diffPrecision.isPPDSafeConversion(2)); // column not included + assertFalse(both1diffPrecision.isPPDSafeConversion(3)); + + // add columns + readerStruct1 = TypeDescription.createStruct() + .addField("f1", TypeDescription.createInt()) + .addField("f2", TypeDescription.createString()) + .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10)) + .addField("f4", TypeDescription.createBoolean()); + both1 = new SchemaEvolution(fileStruct1, readerStruct1, null); + assertTrue(both1.hasConversion()); + assertFalse(both1.isPPDSafeConversion(0)); + assertTrue(both1.isPPDSafeConversion(1)); + assertTrue(both1.isPPDSafeConversion(2)); + assertTrue(both1.isPPDSafeConversion(3)); + assertFalse(both1.isPPDSafeConversion(4)); + } + + @Test + public void testSafePpdEvaluationForInts() throws IOException { + // byte -> short -> int -> long + TypeDescription fileSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createByte()); + SchemaEvolution schemaEvolution = new SchemaEvolution(fileSchema, null); + assertFalse(schemaEvolution.hasConversion()); + + // byte -> short + TypeDescription readerSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createShort()); + schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); + assertTrue(schemaEvolution.hasConversion()); + assertFalse(schemaEvolution.isPPDSafeConversion(0)); + assertTrue(schemaEvolution.isPPDSafeConversion(1)); + + // byte -> int + readerSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createInt()); + schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); + assertTrue(schemaEvolution.hasConversion()); + assertFalse(schemaEvolution.isPPDSafeConversion(0)); + assertTrue(schemaEvolution.isPPDSafeConversion(1)); + + // byte -> long + readerSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createLong()); + schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); + assertTrue(schemaEvolution.hasConversion()); + assertFalse(schemaEvolution.isPPDSafeConversion(0)); + assertTrue(schemaEvolution.isPPDSafeConversion(1)); + + // short -> int -> long + fileSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createShort()); + schemaEvolution = new SchemaEvolution(fileSchema, null); + assertFalse(schemaEvolution.hasConversion()); + + // unsafe conversion short -> byte + readerSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createByte()); + schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); + assertTrue(schemaEvolution.hasConversion()); + assertFalse(schemaEvolution.isPPDSafeConversion(0)); + assertFalse(schemaEvolution.isPPDSafeConversion(1)); + + // short -> int + readerSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createInt()); + schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); + assertTrue(schemaEvolution.hasConversion()); + assertFalse(schemaEvolution.isPPDSafeConversion(0)); + assertTrue(schemaEvolution.isPPDSafeConversion(1)); + + // short -> long + readerSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createLong()); + schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); + assertTrue(schemaEvolution.hasConversion()); + assertFalse(schemaEvolution.isPPDSafeConversion(0)); + assertTrue(schemaEvolution.isPPDSafeConversion(1)); + + // int -> long + fileSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createInt()); + schemaEvolution = new SchemaEvolution(fileSchema, null); + assertFalse(schemaEvolution.hasConversion()); + + // unsafe conversion int -> byte + readerSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createByte()); + schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); + assertTrue(schemaEvolution.hasConversion()); + assertFalse(schemaEvolution.isPPDSafeConversion(0)); + assertFalse(schemaEvolution.isPPDSafeConversion(1)); + + // unsafe conversion int -> short + readerSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createShort()); + schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); + assertTrue(schemaEvolution.hasConversion()); + assertFalse(schemaEvolution.isPPDSafeConversion(0)); + assertFalse(schemaEvolution.isPPDSafeConversion(1)); + + // int -> long + readerSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createLong()); + schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); + assertTrue(schemaEvolution.hasConversion()); + assertFalse(schemaEvolution.isPPDSafeConversion(0)); + assertTrue(schemaEvolution.isPPDSafeConversion(1)); + + // long + fileSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createLong()); + schemaEvolution = new SchemaEvolution(fileSchema, null); + assertTrue(schemaEvolution.isPPDSafeConversion(0)); + assertFalse(schemaEvolution.hasConversion()); + + // unsafe conversion long -> byte + readerSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createByte()); + schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); + assertTrue(schemaEvolution.hasConversion()); + assertFalse(schemaEvolution.isPPDSafeConversion(0)); + assertFalse(schemaEvolution.isPPDSafeConversion(1)); + + // unsafe conversion long -> short + readerSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createShort()); + schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); + assertTrue(schemaEvolution.hasConversion()); + assertFalse(schemaEvolution.isPPDSafeConversion(0)); + assertFalse(schemaEvolution.isPPDSafeConversion(1)); + + // unsafe conversion long -> int + readerSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createInt()); + schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); + assertTrue(schemaEvolution.hasConversion()); + assertFalse(schemaEvolution.isPPDSafeConversion(0)); + assertFalse(schemaEvolution.isPPDSafeConversion(1)); + + // invalid + readerSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createString()); + schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); + assertTrue(schemaEvolution.hasConversion()); + assertFalse(schemaEvolution.isPPDSafeConversion(0)); + assertFalse(schemaEvolution.isPPDSafeConversion(1)); + + // invalid + readerSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createFloat()); + schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); + assertTrue(schemaEvolution.hasConversion()); + assertFalse(schemaEvolution.isPPDSafeConversion(0)); + assertFalse(schemaEvolution.isPPDSafeConversion(1)); + + // invalid + readerSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createTimestamp()); + schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); + assertTrue(schemaEvolution.hasConversion()); + assertFalse(schemaEvolution.isPPDSafeConversion(0)); + assertFalse(schemaEvolution.isPPDSafeConversion(1)); + } + + @Test + public void testSafePpdEvaluationForStrings() throws IOException { + TypeDescription fileSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createString()); + SchemaEvolution schemaEvolution = new SchemaEvolution(fileSchema, null); + assertTrue(schemaEvolution.isPPDSafeConversion(0)); + assertFalse(schemaEvolution.hasConversion()); + + // string -> char + TypeDescription readerSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createChar()); + schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); + assertTrue(schemaEvolution.hasConversion()); + assertFalse(schemaEvolution.isPPDSafeConversion(0)); + assertFalse(schemaEvolution.isPPDSafeConversion(1)); + + // string -> varchar + readerSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createVarchar()); + schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); + assertTrue(schemaEvolution.hasConversion()); + assertFalse(schemaEvolution.isPPDSafeConversion(0)); + assertTrue(schemaEvolution.isPPDSafeConversion(1)); + + fileSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createChar()); + schemaEvolution = new SchemaEvolution(fileSchema, null); + assertTrue(schemaEvolution.isPPDSafeConversion(0)); + assertFalse(schemaEvolution.hasConversion()); + + // char -> string + readerSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createString()); + schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); + assertTrue(schemaEvolution.hasConversion()); + assertFalse(schemaEvolution.isPPDSafeConversion(0)); + assertFalse(schemaEvolution.isPPDSafeConversion(1)); + + // char -> varchar + readerSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createVarchar()); + schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); + assertTrue(schemaEvolution.hasConversion()); + assertFalse(schemaEvolution.isPPDSafeConversion(0)); + assertFalse(schemaEvolution.isPPDSafeConversion(1)); + + fileSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createVarchar()); + schemaEvolution = new SchemaEvolution(fileSchema, null); + assertTrue(schemaEvolution.isPPDSafeConversion(0)); + assertFalse(schemaEvolution.hasConversion()); + + // varchar -> string + readerSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createString()); + schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); + assertTrue(schemaEvolution.hasConversion()); + assertFalse(schemaEvolution.isPPDSafeConversion(0)); + assertTrue(schemaEvolution.isPPDSafeConversion(1)); + + // varchar -> char + readerSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createChar()); + schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); + assertTrue(schemaEvolution.hasConversion()); + assertFalse(schemaEvolution.isPPDSafeConversion(0)); + assertFalse(schemaEvolution.isPPDSafeConversion(1)); + + // invalid + readerSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createDecimal()); + schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); + assertTrue(schemaEvolution.hasConversion()); + assertFalse(schemaEvolution.isPPDSafeConversion(0)); + assertFalse(schemaEvolution.isPPDSafeConversion(1)); + + // invalid + readerSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createDate()); + schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); + assertTrue(schemaEvolution.hasConversion()); + assertFalse(schemaEvolution.isPPDSafeConversion(0)); + assertFalse(schemaEvolution.isPPDSafeConversion(1)); + + // invalid + readerSchema = TypeDescription.createStruct() + .addField("f1", TypeDescription.createInt()); + schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null); + assertTrue(schemaEvolution.hasConversion()); + assertFalse(schemaEvolution.isPPDSafeConversion(0)); + assertFalse(schemaEvolution.isPPDSafeConversion(1)); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 63d02fb..0a2c3fa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -99,7 +99,6 @@ import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; -import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -1272,24 +1271,14 @@ public String toString() { // We can't eliminate stripes if there are deltas because the // deltas may change the rows making them match the predicate. if ((deltas == null || deltas.isEmpty()) && context.sarg != null) { - // Also, we currently do not use predicate evaluation when the schema has data type - // conversion. - if (evolution.hasConversion()) { - if (LOG.isDebugEnabled()) { - LOG.debug( - "Skipping split elimination for {} since the schema has data type conversion", - file.getPath()); - } + String[] colNames = + extractNeededColNames((readerTypes == null ? fileTypes : readerTypes), + context.conf, readerIncluded, isOriginal); + if (colNames == null) { + LOG.warn("Skipping split elimination for {} as column names is null", file.getPath()); } else { - String[] colNames = - extractNeededColNames((readerTypes == null ? fileTypes : readerTypes), - context.conf, readerIncluded, isOriginal); - if (colNames == null) { - LOG.warn("Skipping split elimination for {} as column names is null", file.getPath()); - } else { - includeStripe = pickStripes(context.sarg, colNames, writerVersion, isOriginal, - stripeStats, stripes.size(), file.getPath()); - } + includeStripe = pickStripes(context.sarg, colNames, writerVersion, isOriginal, + stripeStats, stripes.size(), file.getPath(), evolution); } } return generateSplitsFromStripes(includeStripe); @@ -1901,12 +1890,14 @@ static Path findOriginalBucket(FileSystem fs, // eliminate stripes that doesn't satisfy the predicate condition List sargLeaves = sarg.getLeaves(); int[] filterColumns = RecordReaderImpl.mapTranslatedSargColumns(types, sargLeaves); - return pickStripesInternal(sarg, filterColumns, stripeStats, stripeCount, null); + TypeDescription schema = OrcUtils.convertTypeFromProtobuf(types, 0); + SchemaEvolution evolution = new SchemaEvolution(schema, null); + return pickStripesInternal(sarg, filterColumns, stripeStats, stripeCount, null, evolution); } private static boolean[] pickStripes(SearchArgument sarg, String[] sargColNames, OrcFile.WriterVersion writerVersion, boolean isOriginal, List stripeStats, - int stripeCount, Path filePath) { + int stripeCount, Path filePath, final SchemaEvolution evolution) { if (sarg == null || stripeStats == null || writerVersion == OrcFile.WriterVersion.ORIGINAL) { return null; // only do split pruning if HIVE-8732 has been fixed in the writer } @@ -1914,15 +1905,16 @@ static Path findOriginalBucket(FileSystem fs, List sargLeaves = sarg.getLeaves(); int[] filterColumns = RecordReaderImpl.mapSargColumnsToOrcInternalColIdx(sargLeaves, sargColNames, getRootColumn(isOriginal)); - return pickStripesInternal(sarg, filterColumns, stripeStats, stripeCount, filePath); + return pickStripesInternal(sarg, filterColumns, stripeStats, stripeCount, filePath, evolution); } private static boolean[] pickStripesInternal(SearchArgument sarg, int[] filterColumns, - List stripeStats, int stripeCount, Path filePath) { + List stripeStats, int stripeCount, Path filePath, + final SchemaEvolution evolution) { boolean[] includeStripe = new boolean[stripeCount]; for (int i = 0; i < includeStripe.length; ++i) { includeStripe[i] = (i >= stripeStats.size()) || - isStripeSatisfyPredicate(stripeStats.get(i), sarg, filterColumns); + isStripeSatisfyPredicate(stripeStats.get(i), sarg, filterColumns, evolution); if (isDebugEnabled && !includeStripe[i]) { LOG.debug("Eliminating ORC stripe-" + i + " of file '" + filePath + "' as it did not satisfy predicate condition."); @@ -1932,15 +1924,19 @@ static Path findOriginalBucket(FileSystem fs, } private static boolean isStripeSatisfyPredicate( - StripeStatistics stripeStatistics, SearchArgument sarg, int[] filterColumns) { + StripeStatistics stripeStatistics, SearchArgument sarg, int[] filterColumns, + final SchemaEvolution evolution) { List predLeaves = sarg.getLeaves(); TruthValue[] truthValues = new TruthValue[predLeaves.size()]; for (int pred = 0; pred < truthValues.length; pred++) { if (filterColumns[pred] != -1) { - - // column statistics at index 0 contains only the number of rows - ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]]; - truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred), null); + if (evolution != null && !evolution.isPPDSafeConversion(filterColumns[pred])) { + truthValues[pred] = TruthValue.YES_NO_NULL; + } else { + // column statistics at index 0 contains only the number of rows + ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]]; + truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred), null); + } } else { // parition column case. diff --git a/ql/src/test/queries/clientpositive/orc_ppd_schema_evol_3a.q b/ql/src/test/queries/clientpositive/orc_ppd_schema_evol_3a.q new file mode 100644 index 0000000..88a94eb --- /dev/null +++ b/ql/src/test/queries/clientpositive/orc_ppd_schema_evol_3a.q @@ -0,0 +1,245 @@ +set hive.mapred.mode=nonstrict; +SET hive.fetch.task.conversion=none; +SET hive.cbo.enable=false; + +CREATE TABLE staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging; +LOAD DATA LOCAL INPATH '../../data/files/over1k' INTO TABLE staging; + +CREATE TABLE orc_ppd_staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*"); + +insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s; + +-- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values +-- which makes it hard to test bloom filters +insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1; +insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1; + +CREATE TABLE orc_ppd(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*"); + +insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s; + +SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecTezSummaryPrinter; +SET hive.optimize.index.filter=false; + +-- Row group statistics for column t: +-- Entry 0: count: 994 hasNull: true min: -10 max: 54 sum: 26014 positions: 0,0,0,0,0,0,0 +-- Entry 1: count: 1000 hasNull: false min: 54 max: 118 sum: 86812 positions: 0,2,124,0,0,116,11 +-- Entry 2: count: 100 hasNull: false min: 118 max: 127 sum: 12151 positions: 0,4,119,0,0,244,19 + +-- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127; +SET hive.optimize.index.filter=true; +-- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127; + +SET hive.optimize.index.filter=false; +-- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55; +SET hive.optimize.index.filter=true; +-- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55; + +SET hive.optimize.index.filter=false; +-- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54; +SET hive.optimize.index.filter=true; +-- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54; + +alter table orc_ppd change column t t smallint; + +SET hive.optimize.index.filter=false; +-- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127; +SET hive.optimize.index.filter=true; +-- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127; + +SET hive.optimize.index.filter=false; +-- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55; +SET hive.optimize.index.filter=true; +-- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55; + +SET hive.optimize.index.filter=false; +-- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54; +SET hive.optimize.index.filter=true; +-- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54; + +alter table orc_ppd change column t t int; + +SET hive.optimize.index.filter=false; +-- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127; +SET hive.optimize.index.filter=true; +-- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127; + +SET hive.optimize.index.filter=false; +-- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55; +SET hive.optimize.index.filter=true; +-- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55; + +SET hive.optimize.index.filter=false; +-- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54; +SET hive.optimize.index.filter=true; +-- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54; + +alter table orc_ppd change column t t bigint; + +SET hive.optimize.index.filter=false; +-- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127; +SET hive.optimize.index.filter=true; +-- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127; + +SET hive.optimize.index.filter=false; +-- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55; +SET hive.optimize.index.filter=true; +-- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55; + +SET hive.optimize.index.filter=false; +-- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54; +SET hive.optimize.index.filter=true; +-- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54; + +alter table orc_ppd change column t t string; + +SET hive.optimize.index.filter=false; +-- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > '127'; +SET hive.optimize.index.filter=true; +-- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > '127'; + +SET hive.optimize.index.filter=false; +-- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = '55'; +SET hive.optimize.index.filter=true; +-- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = '55'; + +SET hive.optimize.index.filter=false; +-- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = '54'; +SET hive.optimize.index.filter=true; +-- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = '54'; + +SET hive.optimize.index.filter=false; +-- float tests +select count(*) from orc_ppd where f = 74.72; +SET hive.optimize.index.filter=true; +select count(*) from orc_ppd where f = 74.72; + +alter table orc_ppd change column f f double; + +SET hive.optimize.index.filter=false; +select count(*) from orc_ppd where f = 74.72; +SET hive.optimize.index.filter=true; +select count(*) from orc_ppd where f = 74.72; + +alter table orc_ppd change column f f string; + +SET hive.optimize.index.filter=false; +select count(*) from orc_ppd where f = '74.72'; +SET hive.optimize.index.filter=true; +select count(*) from orc_ppd where f = '74.72'; + +SET hive.optimize.index.filter=false; +-- string tests +select count(*) from orc_ppd where s = 'bob davidson'; +SET hive.optimize.index.filter=true; +select count(*) from orc_ppd where s = 'bob davidson'; + +alter table orc_ppd change column s s char(50); + +SET hive.optimize.index.filter=false; +select count(*) from orc_ppd where s = 'bob davidson'; +SET hive.optimize.index.filter=true; +select count(*) from orc_ppd where s = 'bob davidson'; + +alter table orc_ppd change column s s varchar(50); + +SET hive.optimize.index.filter=false; +select count(*) from orc_ppd where s = 'bob davidson'; +SET hive.optimize.index.filter=true; +select count(*) from orc_ppd where s = 'bob davidson'; + +alter table orc_ppd change column s s char(50); + +SET hive.optimize.index.filter=false; +select count(*) from orc_ppd where s = 'bob davidson'; +SET hive.optimize.index.filter=true; +select count(*) from orc_ppd where s = 'bob davidson'; + +alter table orc_ppd change column s s string; + +SET hive.optimize.index.filter=false; +select count(*) from orc_ppd where s = 'bob davidson'; +SET hive.optimize.index.filter=true; +select count(*) from orc_ppd where s = 'bob davidson'; + +alter table orc_ppd add columns (boo boolean); + +SET hive.optimize.index.filter=false; +-- ppd on newly added column +select count(*) from orc_ppd where si = 442; +select count(*) from orc_ppd where si = 442 or boo is not null or boo = false; +SET hive.optimize.index.filter=true; +select count(*) from orc_ppd where si = 442; +select count(*) from orc_ppd where si = 442 or boo is not null or boo = false; diff --git a/ql/src/test/results/clientpositive/orc_ppd_schema_evol_3a.q.out b/ql/src/test/results/clientpositive/orc_ppd_schema_evol_3a.q.out new file mode 100644 index 0000000..494524e --- /dev/null +++ b/ql/src/test/results/clientpositive/orc_ppd_schema_evol_3a.q.out @@ -0,0 +1,544 @@ +PREHOOK: query: CREATE TABLE staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@staging +POSTHOOK: query: CREATE TABLE staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' INTO TABLE staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' INTO TABLE staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@staging +PREHOOK: query: CREATE TABLE orc_ppd_staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_ppd_staging +POSTHOOK: query: CREATE TABLE orc_ppd_staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_ppd_staging +PREHOOK: query: insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s +PREHOOK: type: QUERY +PREHOOK: Input: default@staging +PREHOOK: Output: default@orc_ppd_staging +POSTHOOK: query: insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@staging +POSTHOOK: Output: default@orc_ppd_staging +POSTHOOK: Lineage: orc_ppd_staging.b SIMPLE [(staging)staging.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.bin SIMPLE [(staging)staging.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.bo SIMPLE [(staging)staging.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.c EXPRESSION [(staging)staging.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.d SIMPLE [(staging)staging.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.da EXPRESSION [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.dec SIMPLE [(staging)staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.f SIMPLE [(staging)staging.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.i SIMPLE [(staging)staging.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE [(staging)staging.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION [(staging)staging.FieldSchema(name:s, type:string, comment:null), ] +PREHOOK: query: -- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values +-- which makes it hard to test bloom filters +insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@staging +PREHOOK: Output: default@orc_ppd_staging +POSTHOOK: query: -- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values +-- which makes it hard to test bloom filters +insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@staging +POSTHOOK: Output: default@orc_ppd_staging +POSTHOOK: Lineage: orc_ppd_staging.b EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.bin EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.bo SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.c EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.d EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.da EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.dec EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.f EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.i EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.si EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.t EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.ts EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION [] +PREHOOK: query: insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@staging +PREHOOK: Output: default@orc_ppd_staging +POSTHOOK: query: insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@staging +POSTHOOK: Output: default@orc_ppd_staging +POSTHOOK: Lineage: orc_ppd_staging.b SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.bin EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.bo SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.c EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.d EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.da EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.dec SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.f EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.i SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.si EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.t EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.ts EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION [] +PREHOOK: query: CREATE TABLE orc_ppd(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_ppd +POSTHOOK: query: CREATE TABLE orc_ppd(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_ppd +PREHOOK: query: insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd_staging +PREHOOK: Output: default@orc_ppd +POSTHOOK: query: insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_ppd_staging +POSTHOOK: Output: default@orc_ppd +POSTHOOK: Lineage: orc_ppd.b SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_ppd.bin SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: orc_ppd.bo SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: orc_ppd.c EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: orc_ppd.d SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: orc_ppd.da EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_ppd.dec SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +POSTHOOK: Lineage: orc_ppd.f SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: orc_ppd.i SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: orc_ppd.s SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: orc_ppd.si SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: orc_ppd.t SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: orc_ppd.ts SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_ppd.v EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ] +PREHOOK: query: -- Row group statistics for column t: +-- Entry 0: count: 994 hasNull: true min: -10 max: 54 sum: 26014 positions: 0,0,0,0,0,0,0 +-- Entry 1: count: 1000 hasNull: false min: 54 max: 118 sum: 86812 positions: 0,2,124,0,0,116,11 +-- Entry 2: count: 100 hasNull: false min: 118 max: 127 sum: 12151 positions: 0,4,119,0,0,244,19 + +-- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +0 +PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +0 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +8 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +8 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +18 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +18 +PREHOOK: query: alter table orc_ppd change column t t smallint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +0 +PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +0 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +8 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +8 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +18 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +18 +PREHOOK: query: alter table orc_ppd change column t t int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +0 +PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +0 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +8 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +8 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +18 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +18 +PREHOOK: query: alter table orc_ppd change column t t bigint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +0 +PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +0 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +8 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +8 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +18 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +18 +PREHOOK: query: alter table orc_ppd change column t t string +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > '127' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +1566 +PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > '127' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +1566 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = '55' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +8 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = '55' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +8 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = '54' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +18 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = '54' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +18 +PREHOOK: query: -- float tests +select count(*) from orc_ppd where f = 74.72 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +2 +PREHOOK: query: select count(*) from orc_ppd where f = 74.72 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +2 +PREHOOK: query: alter table orc_ppd change column f f double +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: select count(*) from orc_ppd where f = 74.72 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +2 +PREHOOK: query: select count(*) from orc_ppd where f = 74.72 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +2 +PREHOOK: query: alter table orc_ppd change column f f string +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: select count(*) from orc_ppd where f = '74.72' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +2 +PREHOOK: query: select count(*) from orc_ppd where f = '74.72' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +2 +PREHOOK: query: -- string tests +select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +6 +PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +6 +PREHOOK: query: alter table orc_ppd change column s s char(50) +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +6 +PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +6 +PREHOOK: query: alter table orc_ppd change column s s varchar(50) +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +6 +PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +6 +PREHOOK: query: alter table orc_ppd change column s s char(50) +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +6 +PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +6 +PREHOOK: query: alter table orc_ppd change column s s string +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +6 +PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +6 +PREHOOK: query: alter table orc_ppd add columns (boo boolean) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: -- ppd on newly added column +select count(*) from orc_ppd where si = 442 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +4 +PREHOOK: query: select count(*) from orc_ppd where si = 442 or boo is not null or boo = false +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +4 +PREHOOK: query: select count(*) from orc_ppd where si = 442 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +4 +PREHOOK: query: select count(*) from orc_ppd where si = 442 or boo is not null or boo = false +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +4 diff --git a/ql/src/test/results/clientpositive/tez/orc_ppd_schema_evol_3a.q.out b/ql/src/test/results/clientpositive/tez/orc_ppd_schema_evol_3a.q.out new file mode 100644 index 0000000..20d7085 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/orc_ppd_schema_evol_3a.q.out @@ -0,0 +1,1132 @@ +PREHOOK: query: CREATE TABLE staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@staging +POSTHOOK: query: CREATE TABLE staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' INTO TABLE staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' INTO TABLE staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@staging +PREHOOK: query: CREATE TABLE orc_ppd_staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_ppd_staging +POSTHOOK: query: CREATE TABLE orc_ppd_staging(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_ppd_staging +PREHOOK: query: insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s +PREHOOK: type: QUERY +PREHOOK: Input: default@staging +PREHOOK: Output: default@orc_ppd_staging +POSTHOOK: query: insert overwrite table orc_ppd_staging select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from staging order by t, s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@staging +POSTHOOK: Output: default@orc_ppd_staging +POSTHOOK: Lineage: orc_ppd_staging.b SIMPLE [(staging)staging.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.bin SIMPLE [(staging)staging.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.bo SIMPLE [(staging)staging.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.c EXPRESSION [(staging)staging.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.d SIMPLE [(staging)staging.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.da EXPRESSION [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.dec SIMPLE [(staging)staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.f SIMPLE [(staging)staging.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.i SIMPLE [(staging)staging.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE [(staging)staging.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.si SIMPLE [(staging)staging.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.t SIMPLE [(staging)staging.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.ts SIMPLE [(staging)staging.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION [(staging)staging.FieldSchema(name:s, type:string, comment:null), ] +PREHOOK: query: -- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values +-- which makes it hard to test bloom filters +insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@staging +PREHOOK: Output: default@orc_ppd_staging +POSTHOOK: query: -- just to introduce a gap in min/max range for bloom filters. The dataset has contiguous values +-- which makes it hard to test bloom filters +insert into orc_ppd_staging select -10,-321,-65680,-4294967430,-97.94,-13.07,true,"aaa","aaa","aaa","1990-03-11","1990-03-11 10:11:58.703308",-71.54,"aaa" from staging limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@staging +POSTHOOK: Output: default@orc_ppd_staging +POSTHOOK: Lineage: orc_ppd_staging.b EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.bin EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.bo SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.c EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.d EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.da EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.dec EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.f EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.i EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.si EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.t EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.ts EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION [] +PREHOOK: query: insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@staging +PREHOOK: Output: default@orc_ppd_staging +POSTHOOK: query: insert into orc_ppd_staging select 127,331,65690,4294967440,107.94,23.07,true,"zzz","zzz","zzz","2023-03-11","2023-03-11 10:11:58.703308",71.54,"zzz" from staging limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@staging +POSTHOOK: Output: default@orc_ppd_staging +POSTHOOK: Lineage: orc_ppd_staging.b SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.bin EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.bo SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.c EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.d EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.da EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.dec SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.f EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.i SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.s SIMPLE [] +POSTHOOK: Lineage: orc_ppd_staging.si EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.t EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.ts EXPRESSION [] +POSTHOOK: Lineage: orc_ppd_staging.v EXPRESSION [] +PREHOOK: query: CREATE TABLE orc_ppd(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_ppd +POSTHOOK: query: CREATE TABLE orc_ppd(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + c char(50), + v varchar(50), + da date, + ts timestamp, + dec decimal(4,2), + bin binary) +STORED AS ORC tblproperties("orc.row.index.stride" = "1000", "orc.bloom.filter.columns"="*") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_ppd +PREHOOK: query: insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd_staging +PREHOOK: Output: default@orc_ppd +POSTHOOK: query: insert overwrite table orc_ppd select t, si, i, b, f, d, bo, s, cast(s as char(50)), cast(s as varchar(50)), cast(ts as date), ts, dec, bin from orc_ppd_staging order by t, s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_ppd_staging +POSTHOOK: Output: default@orc_ppd +POSTHOOK: Lineage: orc_ppd.b SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_ppd.bin SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: orc_ppd.bo SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: orc_ppd.c EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: orc_ppd.d SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: orc_ppd.da EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_ppd.dec SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:dec, type:decimal(4,2), comment:null), ] +POSTHOOK: Lineage: orc_ppd.f SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: orc_ppd.i SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: orc_ppd.s SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: orc_ppd.si SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: orc_ppd.t SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: orc_ppd.ts SIMPLE [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_ppd.v EXPRESSION [(orc_ppd_staging)orc_ppd_staging.FieldSchema(name:s, type:string, comment:null), ] +PREHOOK: query: -- Row group statistics for column t: +-- Entry 0: count: 994 hasNull: true min: -10 max: 54 sum: 26014 positions: 0,0,0,0,0,0,0 +-- Entry 1: count: 1000 hasNull: false min: 54 max: 118 sum: 86812 positions: 0,2,124,0,0,116,11 +-- Entry 2: count: 100 hasNull: false min: 118 max: 127 sum: 12151 positions: 0,4,119,0,0,244,19 + +-- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 16936 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +0 +PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + RECORDS_OUT_0: 1 +0 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 16936 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +8 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 17909 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 1000 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +8 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 16936 + HDFS_BYTES_WRITTEN: 102 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +18 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 17909 + HDFS_BYTES_WRITTEN: 102 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2000 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +18 +PREHOOK: query: alter table orc_ppd change column t t smallint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 16936 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +0 +PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + RECORDS_OUT_0: 1 +0 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 16936 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +8 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 17909 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 1000 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +8 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 16936 + HDFS_BYTES_WRITTEN: 102 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +18 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 17909 + HDFS_BYTES_WRITTEN: 102 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2000 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +18 +PREHOOK: query: alter table orc_ppd change column t t int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 16936 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +0 +PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + RECORDS_OUT_0: 1 +0 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 16936 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +8 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 17909 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 1000 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +8 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 16936 + HDFS_BYTES_WRITTEN: 102 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +18 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 17909 + HDFS_BYTES_WRITTEN: 102 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2000 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +18 +PREHOOK: query: alter table orc_ppd change column t t bigint +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 16936 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +0 +PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > 127 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 0 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 2 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + RECORDS_OUT_0: 1 +0 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 16936 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +8 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = 55 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 17909 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 1000 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +8 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 16936 + HDFS_BYTES_WRITTEN: 102 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +18 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = 54 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 17909 + HDFS_BYTES_WRITTEN: 102 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2000 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +18 +PREHOOK: query: alter table orc_ppd change column t t string +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > '127' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 16936 + HDFS_BYTES_WRITTEN: 104 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +1566 +PREHOOK: query: -- INPUT_RECORDS: 0 (no row groups) +select count(*) from orc_ppd where t > '127' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 17909 + HDFS_BYTES_WRITTEN: 104 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +1566 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = '55' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 16936 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +8 +PREHOOK: query: -- INPUT_RECORDS: 1000 (1 row group) +select count(*) from orc_ppd where t = '55' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 17909 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +8 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = '54' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 16936 + HDFS_BYTES_WRITTEN: 102 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +18 +PREHOOK: query: -- INPUT_RECORDS: 2000 (2 row groups) +select count(*) from orc_ppd where t = '54' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 17909 + HDFS_BYTES_WRITTEN: 102 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +18 +PREHOOK: query: -- float tests +select count(*) from orc_ppd where f = 74.72 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 21496 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +2 +PREHOOK: query: select count(*) from orc_ppd where f = 74.72 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 23556 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +2 +PREHOOK: query: alter table orc_ppd change column f f double +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: select count(*) from orc_ppd where f = 74.72 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 21496 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +2 +PREHOOK: query: select count(*) from orc_ppd where f = 74.72 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 23556 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +2 +PREHOOK: query: alter table orc_ppd change column f f string +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: select count(*) from orc_ppd where f = '74.72' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 21496 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +2 +PREHOOK: query: select count(*) from orc_ppd where f = '74.72' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 23556 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +2 +PREHOOK: query: -- string tests +select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 20667 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +6 +PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 22574 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +6 +PREHOOK: query: alter table orc_ppd change column s s char(50) +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 20667 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +6 +PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 22574 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +6 +PREHOOK: query: alter table orc_ppd change column s s varchar(50) +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 20667 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +6 +PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 22574 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +6 +PREHOOK: query: alter table orc_ppd change column s s char(50) +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 20667 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +6 +PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 22574 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +6 +PREHOOK: query: alter table orc_ppd change column s s string +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 20667 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +6 +PREHOOK: query: select count(*) from orc_ppd where s = 'bob davidson' +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 22574 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +6 +PREHOOK: query: alter table orc_ppd add columns (boo boolean) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@orc_ppd +PREHOOK: Output: default@orc_ppd +PREHOOK: query: -- ppd on newly added column +select count(*) from orc_ppd where si = 442 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 18785 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +4 +PREHOOK: query: select count(*) from orc_ppd where si = 442 or boo is not null or boo = false +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 18785 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +4 +PREHOOK: query: select count(*) from orc_ppd where si = 442 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 20256 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 1000 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +4 +PREHOOK: query: select count(*) from orc_ppd where si = 442 or boo is not null or boo = false +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_ppd +#### A masked pattern was here #### +Stage-1 FILE SYSTEM COUNTERS: + HDFS_BYTES_READ: 20256 + HDFS_BYTES_WRITTEN: 101 + HDFS_READ_OPS: 4 + HDFS_LARGE_READ_OPS: 0 + HDFS_WRITE_OPS: 2 +Stage-1 HIVE COUNTERS: + CREATED_FILES: 1 + DESERIALIZE_ERRORS: 0 + RECORDS_IN_Map_1: 2100 + RECORDS_OUT_0: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 1 +4