diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java index 2607d9c..1f262d0 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java @@ -424,8 +424,7 @@ private void addSplitsForGroup(List dirs, TableScanOperator tableScan, Job TableDesc table = part.getTableDesc(); TableScanOperator tableScan = null; - List aliases = - mrwork.getPathToAliases().get(dir.toUri().toString()); + List aliases = mrwork.getPathToAliases().get(dir.toString()); // Make filter pushdown information available to getSplits. if ((aliases != null) && (aliases.size() == 1)) { @@ -442,6 +441,10 @@ private void addSplitsForGroup(List dirs, TableScanOperator tableScan, Job // push down filters pushFilters(newjob, tableScan); } + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("aliases: {} pathToAliases: {} dir: {}", aliases, mrwork.getPathToAliases(), dir); + } } if (!currentDirs.isEmpty() && @@ -453,7 +456,15 @@ private void addSplitsForGroup(List dirs, TableScanOperator tableScan, Job } if (!currentDirs.isEmpty()) { - LOG.info("Generating splits"); + if (LOG.isInfoEnabled()) { + LOG.info("Generating splits as currentDirs is not empty. currentDirs: {}", currentDirs); + } + + // set columns to read in conf + if (pushDownProjection) { + pushProjection(newjob, readColumnsBuffer, readColumnNamesBuffer); + } + addSplitsForGroup(currentDirs, currentTableScan, newjob, getInputFormatFromCache(currentInputFormatClass, job), currentInputFormatClass, currentDirs.size()*(numSplits / dirs.length), @@ -466,16 +477,16 @@ private void addSplitsForGroup(List dirs, TableScanOperator tableScan, Job currentTable = table; currentInputFormatClass = inputFormatClass; } + + // set columns to read in conf if (pushDownProjection) { - newjob.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); - newjob.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, readColumnsBuffer.toString()); - newjob.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, readColumnNamesBuffer.toString()); - LOG.info(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR + "=" + readColumnsBuffer.toString()); - LOG.info(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR + "=" + readColumnNamesBuffer.toString()); + pushProjection(newjob, readColumnsBuffer, readColumnNamesBuffer); } if (dirs.length != 0) { - LOG.info("Generating splits"); + if (LOG.isInfoEnabled()) { + LOG.info("Generating splits for dirs: {}", dirs); + } addSplitsForGroup(currentDirs, currentTableScan, newjob, getInputFormatFromCache(currentInputFormatClass, job), currentInputFormatClass, currentDirs.size()*(numSplits / dirs.length), @@ -483,11 +494,29 @@ private void addSplitsForGroup(List dirs, TableScanOperator tableScan, Job } Utilities.clearWorkMapForConf(job); - LOG.info("number of splits " + result.size()); + if (LOG.isInfoEnabled()) { + LOG.info("number of splits " + result.size()); + } perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS); return result.toArray(new HiveInputSplit[result.size()]); } + private void pushProjection(final JobConf newjob, final StringBuilder readColumnsBuffer, + final StringBuilder readColumnNamesBuffer) { + String readColIds = readColumnsBuffer.toString(); + String readColNames = readColumnNamesBuffer.toString(); + boolean readAllColumns = readColIds.isEmpty() ? true : false; + newjob.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, readAllColumns); + newjob.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, readColIds); + newjob.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, readColNames); + + if (LOG.isInfoEnabled()) { + LOG.info("{} = {}", ColumnProjectionUtils.READ_ALL_COLUMNS, readAllColumns); + LOG.info("{} = {}", ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, readColIds); + LOG.info("{} = {}", ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, readColNames); + } + } + protected static PartitionDesc getPartitionDescFromPath( Map pathToPartitionInfo, Path dir) throws IOException { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index b3b48d5..359cbf7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -414,7 +414,11 @@ static boolean canCreateSargFromConf(Configuration conf) { private static String[] extractNeededColNames( List types, Configuration conf, boolean[] include, boolean isOriginal) { - return extractNeededColNames(types, getNeededColumnNamesString(conf), include, isOriginal); + String colNames = getNeededColumnNamesString(conf); + if (colNames == null) { + return null; + } + return extractNeededColNames(types, colNames, include, isOriginal); } private static String[] extractNeededColNames( @@ -1068,10 +1072,13 @@ OrcSplit createSplit(long offset, long length, // we can't eliminate stripes if there are deltas because the // deltas may change the rows making them match the predicate. if ((deltas == null || deltas.isEmpty()) && context.sarg != null) { - SearchArgument sarg = ConvertAstToSearchArg.createFromConf(context.conf); String[] colNames = extractNeededColNames(types, context.conf, includedCols, isOriginal); - includeStripe = pickStripes(context.sarg, colNames, writerVersion, isOriginal, - stripeStats, stripes.size(), file.getPath()); + if (colNames == null) { + LOG.warn("Skipping split elimination for {} as column names is null", file.getPath()); + } else { + includeStripe = pickStripes(context.sarg, colNames, writerVersion, isOriginal, + stripeStats, stripes.size(), file.getPath()); + } } // if we didn't have predicate pushdown, read everything