Uploaded image for project: 'Hive'
  1. Hive
  2. HIVE-20563

Vectorization: CASE WHEN expression fails when THEN/ELSE type and result type are different

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Closed
    • Major
    • Resolution: Fixed
    • 4.0.0
    • 4.0.0-alpha-1
    • None
    • None

    Description

      With the following stacktrace:

      java.lang.Exception: java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row
              at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:492) ~[hadoop-mapreduce-client-common-3.1.0.jar:?]
              at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:552) [hadoop-mapreduce-client-common-3.1.0.jar:?]
      Caused by: java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row
              at org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:163) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54) ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
              at org.apache.hadoop.hive.ql.exec.mr.ExecMapRunner.run(ExecMapRunner.java:37) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:465) ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
              at org.apache.hadoop.mapred.MapTask.run(MapTask.java:349) ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
              at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:271) ~[hadoop-mapreduce-client-common-3.1.0.jar:?]
              at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[?:1.8.0_181]
              at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_181]
              at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[?:1.8.0_181]
              at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[?:1.8.0_181]
              at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_181]
      Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row
              at org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator.process(VectorMapOperator.java:973) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:154) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54) ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
              at org.apache.hadoop.hive.ql.exec.mr.ExecMapRunner.run(ExecMapRunner.java:37) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:465) ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
              at org.apache.hadoop.mapred.MapTask.run(MapTask.java:349) ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
              at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:271) ~[hadoop-mapreduce-client-common-3.1.0.jar:?]
              at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[?:1.8.0_181]
              at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_181]
              at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[?:1.8.0_181]
              at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[?:1.8.0_181]
              at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_181]
      Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Error evaluating cstring1
              at org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator.process(VectorSelectOperator.java:149) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.hive.ql.exec.Operator.vectorForward(Operator.java:965) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:938) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator.process(VectorFilterOperator.java:136) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.hive.ql.exec.Operator.vectorForward(Operator.java:965) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:938) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:125) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator.deliverVectorizedRowBatch(VectorMapOperator.java:812) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator.process(VectorMapOperator.java:845) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:154) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54) ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
              at org.apache.hadoop.hive.ql.exec.mr.ExecMapRunner.run(ExecMapRunner.java:37) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:465) ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
              at org.apache.hadoop.mapred.MapTask.run(MapTask.java:349) ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
              at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:271) ~[hadoop-mapreduce-client-common-3.1.0.jar:?]
              at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[?:1.8.0_181]
              at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_181]
              at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[?:1.8.0_181]
              at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[?:1.8.0_181]
              at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_181]
      Caused by: java.lang.ClassCastException: org.apache.hadoop.io.FloatWritable cannot be cast to org.apache.hadoop.io.Text
              at org.apache.hadoop.hive.ql.exec.vector.VectorAssignRow.assignRowColumn(VectorAssignRow.java:471) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.hive.ql.exec.vector.VectorAssignRow.assignRowColumn(VectorAssignRow.java:350) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor.setResult(VectorUDFAdaptor.java:205) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor.evaluate(VectorUDFAdaptor.java:146) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprCondExprBase.conditionalEvaluate(IfExprCondExprBase.java:68) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprColumnCondExpr.evaluate(IfExprColumnCondExpr.java:113) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator.process(VectorSelectOperator.java:146) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.hive.ql.exec.Operator.vectorForward(Operator.java:965) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:938) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator.process(VectorFilterOperator.java:136) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.hive.ql.exec.Operator.vectorForward(Operator.java:965) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:938) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:125) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator.deliverVectorizedRowBatch(VectorMapOperator.java:812) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator.process(VectorMapOperator.java:845) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:154) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54) ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
              at org.apache.hadoop.hive.ql.exec.mr.ExecMapRunner.run(ExecMapRunner.java:37) ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
              at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:465) ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
              at org.apache.hadoop.mapred.MapTask.run(MapTask.java:349) ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
              at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:271) ~[hadoop-mapreduce-client-common-3.1.0.jar:?]
              at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[?:1.8.0_181]
      ...
      

      To repro:

      --! qt:dataset:alltypesorc
      set hive.stats.fetch.column.stats=true;
      set hive.explain.user=false;
      SET hive.vectorized.execution.enabled=true;
      set hive.fetch.task.conversion=none;
      
      -- SORT_QUERY_RESULTS
      
      EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cdouble, cstring1, cint, cfloat, csmallint,
        case
          when (cdouble is not null) then cdouble
          when (cstring1 is not null) then cstring1
          when (cint is not null) then cint
          when (cfloat is not null) then cfloat
          when (csmallint is not null) then csmallint
          else null
          end as c
      FROM alltypesorc
      WHERE (cdouble IS NULL)
      ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c
      LIMIT 10;
      
      SELECT cdouble, cstring1, cint, cfloat, csmallint,
        case
          when (cdouble is not null) then cdouble
          when (cstring1 is not null) then cstring1
          when (cint is not null) then cint
          when (cfloat is not null) then cfloat
          when (csmallint is not null) then csmallint
          else null
          end as c
      FROM alltypesorc
      WHERE (cdouble IS NULL)
      ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c
      LIMIT 10;
      

      Attachments

        1. HIVE-20563.01.patch
          10 kB
          Matt McCline
        2. HIVE-20563.02.patch
          29 kB
          Matt McCline
        3. HIVE-20563.03.patch
          282 kB
          Matt McCline
        4. HIVE-20563.04.patch
          286 kB
          Matt McCline
        5. HIVE-20563.05.patch
          294 kB
          Matt McCline
        6. HIVE-20563.06.patch
          294 kB
          Matt McCline
        7. HIVE-20563.07.patch
          294 kB
          Matt McCline
        8. HIVE-20563.08.patch
          303 kB
          Matt McCline
        9. HIVE-20563.09.patch
          201 kB
          Matt McCline

        Issue Links

          Activity

            People

              mmccline Matt McCline
              jcamacho Jesús Camacho Rodríguez
              Votes:
              0 Vote for this issue
              Watchers:
              1 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved: