From bfd2c04f727ee7486d4a976a0c9ecfd8bc896879 Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Thu, 12 Nov 2015 22:18:16 -0800 Subject: [PATCH] HIVE-12404 : Orc ppd throws exception if types don't match --- .../test/queries/clientpositive/vectorization_0.q | 2 +- .../test/queries/clientpositive/vectorization_13.q | 4 +-- .../clientpositive/vectorization_short_regress.q | 2 +- .../clientpositive/tez/vectorization_0.q.out | 1 + .../clientpositive/tez/vectorization_13.q.out | 2 ++ .../tez/vectorization_short_regress.q.out | 12 +++++++ .../hadoop/hive/ql/io/sarg/SearchArgumentImpl.java | 40 +++++++++++++--------- 7 files changed, 43 insertions(+), 20 deletions(-) diff --git a/ql/src/test/queries/clientpositive/vectorization_0.q b/ql/src/test/queries/clientpositive/vectorization_0.q index d786f74..dd5e4b0 100644 --- a/ql/src/test/queries/clientpositive/vectorization_0.q +++ b/ql/src/test/queries/clientpositive/vectorization_0.q @@ -1,6 +1,6 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; - +set hive.optimize.index.filter=true; -- SORT_QUERY_RESULTS -- Use ORDER BY clauses to generate 2 stages. diff --git a/ql/src/test/queries/clientpositive/vectorization_13.q b/ql/src/test/queries/clientpositive/vectorization_13.q index 831530e..59414f0 100644 --- a/ql/src/test/queries/clientpositive/vectorization_13.q +++ b/ql/src/test/queries/clientpositive/vectorization_13.q @@ -1,7 +1,7 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=minimal; - +set hive.optimize.index.filter=true; -- SORT_QUERY_RESULTS EXPLAIN @@ -133,4 +133,4 @@ WHERE (((cfloat < 3569) AND (ctinyint < 9763215.5639)))) GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 -LIMIT 40; \ No newline at end of file +LIMIT 40; diff --git a/ql/src/test/queries/clientpositive/vectorization_short_regress.q b/ql/src/test/queries/clientpositive/vectorization_short_regress.q index 29cdf19..6df61a8 100644 --- a/ql/src/test/queries/clientpositive/vectorization_short_regress.q +++ b/ql/src/test/queries/clientpositive/vectorization_short_regress.q @@ -1,7 +1,7 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=minimal; - +set hive.optimize.index.filter=true; -- SORT_QUERY_RESULTS -- If you look at ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/OrcFileGenerator.java diff --git a/ql/src/test/results/clientpositive/tez/vectorization_0.q.out b/ql/src/test/results/clientpositive/tez/vectorization_0.q.out index 9c33ace..e37be5b 100644 --- a/ql/src/test/results/clientpositive/tez/vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorization_0.q.out @@ -992,6 +992,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc + filterExpr: ((cstring2 like '%b%') or (79.553 <> UDFToDouble(cint)) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (3569.0 = UDFToDouble(ctinyint)))) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((cstring2 like '%b%') or (79.553 <> UDFToDouble(cint)) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (3569.0 = UDFToDouble(ctinyint)))) (type: boolean) diff --git a/ql/src/test/results/clientpositive/tez/vectorization_13.q.out b/ql/src/test/results/clientpositive/tez/vectorization_13.q.out index ac33341..2c95cef 100644 --- a/ql/src/test/results/clientpositive/tez/vectorization_13.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorization_13.q.out @@ -84,6 +84,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc + filterExpr: (((cfloat < 3569.0) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > 11.0) and (UDFToDouble(ctimestamp2) <> 12.0) and (UDFToDouble(ctinyint) < 9763215.5639))) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((cfloat < 3569.0) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > 11.0) and (UDFToDouble(ctimestamp2) <> 12.0) and (UDFToDouble(ctinyint) < 9763215.5639))) (type: boolean) @@ -337,6 +338,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc + filterExpr: (((cfloat < 3569.0) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -1.388) and (UDFToDouble(ctimestamp2) <> -1.3359999999999999) and (UDFToDouble(ctinyint) < 9763215.5639))) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((cfloat < 3569.0) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -1.388) and (UDFToDouble(ctimestamp2) <> -1.3359999999999999) and (UDFToDouble(ctinyint) < 9763215.5639))) (type: boolean) diff --git a/ql/src/test/results/clientpositive/tez/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/tez/vectorization_short_regress.q.out index 5c2ece6..37e6964 100644 --- a/ql/src/test/results/clientpositive/tez/vectorization_short_regress.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorization_short_regress.q.out @@ -150,6 +150,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc + filterExpr: ((762 = cbigint) or ((UDFToFloat(csmallint) < cfloat) and (UDFToDouble(ctimestamp2) > -5.0) and (cdouble <> UDFToDouble(cint))) or (cstring1 = 'a') or ((UDFToDouble(cbigint) <= -1.389) and (cstring2 <> 'a') and (79.553 <> UDFToDouble(cint)) and (cboolean2 <> cboolean1))) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((762 = cbigint) or ((UDFToFloat(csmallint) < cfloat) and (UDFToDouble(ctimestamp2) > -5.0) and (cdouble <> UDFToDouble(cint))) or (cstring1 = 'a') or ((UDFToDouble(cbigint) <= -1.389) and (cstring2 <> 'a') and (79.553 <> UDFToDouble(cint)) and (cboolean2 <> cboolean1))) (type: boolean) @@ -362,6 +363,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc + filterExpr: (((cbigint <= 197) and (UDFToLong(cint) < cbigint)) or ((cdouble >= -26.28) and (UDFToDouble(csmallint) > cdouble)) or ((UDFToFloat(ctinyint) > cfloat) and cstring1 regexp '.*ss.*') or ((cfloat > 79.553) and (cstring2 like '10%'))) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((cbigint <= 197) and (UDFToLong(cint) < cbigint)) or ((cdouble >= -26.28) and (UDFToDouble(csmallint) > cdouble)) or ((UDFToFloat(ctinyint) > cfloat) and cstring1 regexp '.*ss.*') or ((cfloat > 79.553) and (cstring2 like '10%'))) (type: boolean) @@ -565,6 +567,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc + filterExpr: ((ctimestamp1 = ctimestamp2) or (762.0 = cfloat) or (cstring1 = 'ss') or ((UDFToLong(csmallint) <= cbigint) and (1 = cboolean2)) or (cboolean1 is not null and ctimestamp2 is not null and (cstring2 > 'a'))) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((ctimestamp1 = ctimestamp2) or (762.0 = cfloat) or (cstring1 = 'ss') or ((UDFToLong(csmallint) <= cbigint) and (1 = cboolean2)) or (cboolean1 is not null and ctimestamp2 is not null and (cstring2 > 'a'))) (type: boolean) @@ -747,6 +750,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc + filterExpr: (((ctimestamp2 <= ctimestamp1) and (UDFToDouble(cbigint) <> cdouble) and ('ss' <= cstring1)) or ((csmallint < UDFToShort(ctinyint)) and (UDFToDouble(ctimestamp1) >= 0.0)) or (cfloat = 17.0)) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((ctimestamp2 <= ctimestamp1) and (UDFToDouble(cbigint) <> cdouble) and ('ss' <= cstring1)) or ((csmallint < UDFToShort(ctinyint)) and (UDFToDouble(ctimestamp1) >= 0.0)) or (cfloat = 17.0)) (type: boolean) @@ -937,6 +941,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc + filterExpr: ((cstring1 regexp 'a.*' and (cstring2 like '%ss%')) or ((1 <> cboolean2) and (UDFToDouble(csmallint) < 79.553) and (-257 <> UDFToInteger(ctinyint))) or ((cdouble > UDFToDouble(ctinyint)) and (cfloat >= UDFToFloat(cint))) or ((UDFToLong(cint) < cbigint) and (UDFToLong(ctinyint) > cbigint))) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((cstring1 regexp 'a.*' and (cstring2 like '%ss%')) or ((1 <> cboolean2) and (UDFToDouble(csmallint) < 79.553) and (-257 <> UDFToInteger(ctinyint))) or ((cdouble > UDFToDouble(ctinyint)) and (cfloat >= UDFToFloat(cint))) or ((UDFToLong(cint) < cbigint) and (UDFToLong(ctinyint) > cbigint))) (type: boolean) @@ -1195,6 +1200,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc + filterExpr: (((197.0 > UDFToDouble(ctinyint)) and (UDFToLong(cint) = cbigint)) or (cbigint = 359) or (cboolean1 < 0) or ((cstring1 like '%ss') and (cfloat <= UDFToFloat(ctinyint)))) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((197.0 > UDFToDouble(ctinyint)) and (UDFToLong(cint) = cbigint)) or (cbigint = 359) or (cboolean1 < 0) or ((cstring1 like '%ss') and (cfloat <= UDFToFloat(ctinyint)))) (type: boolean) @@ -1402,6 +1408,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc + filterExpr: (((UDFToDouble(csmallint) > -26.28) and (cstring2 like 'ss')) or ((cdouble <= UDFToDouble(cbigint)) and (cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble)) or (UDFToInteger(ctinyint) = -89010) or ((UDFToFloat(cbigint) <= cfloat) and (-26.28 <= UDFToDouble(csmallint)))) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((UDFToDouble(csmallint) > -26.28) and (cstring2 like 'ss')) or ((cdouble <= UDFToDouble(cbigint)) and (cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble)) or (UDFToInteger(ctinyint) = -89010) or ((UDFToFloat(cbigint) <= cfloat) and (-26.28 <= UDFToDouble(csmallint)))) (type: boolean) @@ -1668,6 +1675,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc + filterExpr: (((-1.389 >= UDFToDouble(cint)) and (csmallint < UDFToShort(ctinyint)) and (-6432 > UDFToInteger(csmallint))) or ((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((cstring1 like 'ss%') and (10.175 > UDFToDouble(cbigint)))) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((-1.389 >= UDFToDouble(cint)) and (csmallint < UDFToShort(ctinyint)) and (-6432 > UDFToInteger(csmallint))) or ((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((cstring1 like 'ss%') and (10.175 > UDFToDouble(cbigint)))) (type: boolean) @@ -1876,6 +1884,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc + filterExpr: ((UDFToInteger(csmallint) >= -257) and ((-6432 = UDFToInteger(csmallint)) or ((UDFToDouble(cint) >= cdouble) and (UDFToInteger(ctinyint) <= cint)))) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((UDFToInteger(csmallint) >= -257) and ((-6432 = UDFToInteger(csmallint)) or ((UDFToDouble(cint) >= cdouble) and (UDFToInteger(ctinyint) <= cint)))) (type: boolean) @@ -2083,6 +2092,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc + filterExpr: ((cdouble > 2563.58) and (((cbigint >= UDFToLong(cint)) and (UDFToInteger(csmallint) < cint) and (UDFToDouble(cfloat) < -5638.15)) or (2563.58 = UDFToDouble(ctinyint)) or ((cdouble <= UDFToDouble(cbigint)) and (-5638.15 > UDFToDouble(cbigint))))) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((cdouble > 2563.58) and (((cbigint >= UDFToLong(cint)) and (UDFToInteger(csmallint) < cint) and (UDFToDouble(cfloat) < -5638.15)) or (2563.58 = UDFToDouble(ctinyint)) or ((cdouble <= UDFToDouble(cbigint)) and (-5638.15 > UDFToDouble(cbigint))))) (type: boolean) @@ -2338,6 +2348,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc + filterExpr: ((UDFToDouble(ctimestamp1) <> 0.0) and (((-257 <> UDFToInteger(ctinyint)) and cboolean2 is not null and cstring1 regexp '.*ss' and (-3.0 < UDFToDouble(ctimestamp1))) or (UDFToDouble(ctimestamp2) = -5.0) or ((UDFToDouble(ctimestamp1) < 0.0) and (cstring2 like '%b%')) or (cdouble = UDFToDouble(cint)) or (cboolean1 is null and (cfloat < UDFToFloat(cint))))) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((UDFToDouble(ctimestamp1) <> 0.0) and (((-257 <> UDFToInteger(ctinyint)) and cboolean2 is not null and cstring1 regexp '.*ss' and (-3.0 < UDFToDouble(ctimestamp1))) or (UDFToDouble(ctimestamp2) = -5.0) or ((UDFToDouble(ctimestamp1) < 0.0) and (cstring2 like '%b%')) or (cdouble = UDFToDouble(cint)) or (cboolean1 is null and (cfloat < UDFToFloat(cint))))) (type: boolean) @@ -2670,6 +2681,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc + filterExpr: (cboolean1 is not null and (((cdouble < UDFToDouble(csmallint)) and (cboolean2 = cboolean1) and (UDFToDouble(cbigint) <= -863.257)) or ((cint >= -257) and cstring1 is not null and (cboolean1 >= 1)) or cstring2 regexp 'b' or ((csmallint >= UDFToShort(ctinyint)) and ctimestamp2 is null))) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (cboolean1 is not null and (((cdouble < UDFToDouble(csmallint)) and (cboolean2 = cboolean1) and (UDFToDouble(cbigint) <= -863.257)) or ((cint >= -257) and cstring1 is not null and (cboolean1 >= 1)) or cstring2 regexp 'b' or ((csmallint >= UDFToShort(ctinyint)) and ctimestamp2 is null))) (type: boolean) diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java index eeff131..e9afef3 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java @@ -283,7 +283,7 @@ private int addLeaf(PredicateLeaf leaf) { public Builder lessThan(String column, PredicateLeaf.Type type, Object literal) { ExpressionTree parent = currentTree.getFirst(); - if (column == null || literal == null) { + if (column == null || literal == null || literal.getClass() != type.getValueClass()) { parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL)); } else { PredicateLeaf leaf = @@ -298,7 +298,7 @@ public Builder lessThan(String column, PredicateLeaf.Type type, public Builder lessThanEquals(String column, PredicateLeaf.Type type, Object literal) { ExpressionTree parent = currentTree.getFirst(); - if (column == null || literal == null) { + if (column == null || literal == null || literal.getClass() != type.getValueClass()) { parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL)); } else { PredicateLeaf leaf = @@ -313,7 +313,7 @@ public Builder lessThanEquals(String column, PredicateLeaf.Type type, public Builder equals(String column, PredicateLeaf.Type type, Object literal) { ExpressionTree parent = currentTree.getFirst(); - if (column == null || literal == null) { + if (column == null || literal == null || literal.getClass() != type.getValueClass()) { parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL)); } else { PredicateLeaf leaf = @@ -328,7 +328,7 @@ public Builder equals(String column, PredicateLeaf.Type type, public Builder nullSafeEquals(String column, PredicateLeaf.Type type, Object literal) { ExpressionTree parent = currentTree.getFirst(); - if (column == null || literal == null) { + if (column == null || literal == null || literal.getClass() != type.getValueClass()) { parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL)); } else { PredicateLeaf leaf = @@ -345,19 +345,26 @@ public Builder in(String column, PredicateLeaf.Type type, ExpressionTree parent = currentTree.getFirst(); if (column == null || literal == null) { parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL)); - } else { - if (literal.length == 0) { - throw new IllegalArgumentException("Can't create in expression with " - + "no arguments"); + return this; + } + if (literal.length == 0) { + throw new IllegalArgumentException("Can't create in expression with " + + "no arguments"); + } + for (int i=0; i< literal.length-1; i++) { + if (literal[i].getClass() != type.getValueClass()) { + parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL)); + return this; } - List argList = new ArrayList(); - argList.addAll(Arrays.asList(literal)); - - PredicateLeaf leaf = - new PredicateLeafImpl(PredicateLeaf.Operator.IN, - type, column, null, argList); - parent.getChildren().add(new ExpressionTree(addLeaf(leaf))); } + List argList = new ArrayList(); + argList.addAll(Arrays.asList(literal)); + + PredicateLeaf leaf = + new PredicateLeafImpl(PredicateLeaf.Operator.IN, + type, column, null, argList); + parent.getChildren().add(new ExpressionTree(addLeaf(leaf))); + return this; } @@ -379,7 +386,8 @@ public Builder isNull(String column, PredicateLeaf.Type type) { public Builder between(String column, PredicateLeaf.Type type, Object lower, Object upper) { ExpressionTree parent = currentTree.getFirst(); - if (column == null || lower == null || upper == null) { + if (column == null || lower == null || upper == null || lower.getClass() != type.getValueClass() + || upper.getClass() != type.getValueClass()) { parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL)); } else { List argList = new ArrayList(); -- 1.7.12.4 (Apple Git-37)