diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java index 6b46188a0f..ffd47a2a34 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java @@ -43,6 +43,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; @@ -523,6 +524,7 @@ public Long call() throws Exception { // scanning the filesystem to get file lengths. private Status checkThresholdWithMetastoreStats(final Table table, final PrunedPartitionList partsList, final long threshold) { + Status status = Status.UNAVAILABLE; if (table != null && !table.isPartitioned()) { long dataSize = StatsUtils.getTotalSize(table); if (dataSize <= 0) { @@ -530,7 +532,7 @@ private Status checkThresholdWithMetastoreStats(final Table table, final PrunedP return Status.UNAVAILABLE; } - return (threshold - dataSize) >= 0 ? Status.PASS : Status.FAIL; + status = (threshold - dataSize) >= 0 ? Status.PASS : Status.FAIL; } else if (table != null && table.isPartitioned() && partsList != null) { List dataSizes = StatsUtils.getBasicStatForPartitions(table, partsList.getNotDeniedPartns(), StatsSetupConst.TOTAL_SIZE); @@ -541,10 +543,15 @@ private Status checkThresholdWithMetastoreStats(final Table table, final PrunedP return Status.UNAVAILABLE; } - return (threshold - totalDataSize) >= 0 ? Status.PASS : Status.FAIL; + status = (threshold - totalDataSize) >= 0 ? Status.PASS : Status.FAIL; } - return Status.UNAVAILABLE; + if (status == Status.PASS && MetaStoreUtils.isExternalTable(table.getTTable())) { + // External table should also check the underlying file size. + LOG.warn("Table {} is external table, falling back to filesystem scan.", table.getCompleteName()); + status = Status.UNAVAILABLE; + } + return status; } private long getPathLength(JobConf conf, Path path,