diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 9491015..fce11c8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -215,6 +215,7 @@ public static final String MAPRED_MAPPER_CLASS = "mapred.mapper.class"; public static final String MAPRED_REDUCER_CLASS = "mapred.reducer.class"; public static final String HIVE_ADDED_JARS = "hive.added.jars"; + public static final String VECTOR_MODE = "VECTOR_MODE"; public static String MAPNAME = "Map "; public static String REDUCENAME = "Reducer "; @@ -3238,12 +3239,18 @@ private static void resetUmaskInConf(Configuration conf, boolean unsetUmask, Str * but vectorization disallowed eg. for FetchOperator execution. */ public static boolean isVectorMode(Configuration conf) { - if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED) && - Utilities.getPlanPath(conf) != null && Utilities - .getMapWork(conf).getVectorMode()) { - return true; + if (conf.get(VECTOR_MODE) != null) { + // this code path is necessary, because with HS2 and client + // side split generation we end up not finding the map work. + // This is because of thread local madness (tez split + // generation is multi-threaded - HS2 plan cache uses thread + // locals). + return conf.getBoolean(VECTOR_MODE, false); + } else { + return HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED) + && Utilities.getPlanPath(conf) != null + && Utilities.getMapWork(conf).getVectorMode(); } - return false; } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java index 6e196e6..e8864ae 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java @@ -614,6 +614,15 @@ private Vertex createVertex(JobConf conf, MapWork mapWork, } } else { // Setup client side split generation. + + // we need to set this, because with HS2 and client side split + // generation we end up not finding the map work. This is + // because of thread local madness (tez split generation is + // multi-threaded - HS2 plan cache uses thread locals). Setting + // VECTOR_MODE causes the split gen code to use the conf instead + // of the map work. + conf.setBoolean(Utilities.VECTOR_MODE, mapWork.getVectorMode()); + dataSource = MRInputHelpers.configureMRInputWithLegacySplitGeneration(conf, new Path(tezDir, "split_" + mapWork.getName().replaceAll(" ", "_")), true); numTasks = dataSource.getNumberOfShards();