diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 8c39de3..61b4963 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3898,7 +3898,8 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "is unneeded. This is only necessary for ORC files written before HIVE-9660."), LLAP_IO_USE_FILEID_PATH("hive.llap.io.use.fileid.path", true, "Whether LLAP should use fileId (inode)-based path to ensure better consistency for the\n" + - "cases of file overwrites. This is supported on HDFS."), + "cases of file overwrites. This is supported on HDFS. Disabling this also turns off any\n" + + "cache consistency checks based on fileid comparisons."), // Restricted to text for now as this is a new feature; only text files can be sliced. LLAP_IO_ENCODE_ENABLED("hive.llap.io.encode.enabled", true, "Whether LLAP should try to re-encode and cache data for non-ORC formats. This is used\n" + diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java index e8a3b40..4f5b0a9 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java @@ -214,7 +214,9 @@ public OrcEncodedDataReader(LowLevelCache lowLevelCache, BufferUsageManager buff fs = split.getPath().getFileSystem(jobConf); fileKey = determineFileId(fs, split, HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_CACHE_ALLOW_SYNTHETIC_FILEID), - HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_CACHE_DEFAULT_FS_FILE_ID)); + HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_CACHE_DEFAULT_FS_FILE_ID), + !HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_IO_USE_FILEID_PATH) + ); fileMetadata = getFileFooterFromCacheOrDisk(); final TypeDescription fileSchema = fileMetadata.getSchema(); @@ -464,7 +466,7 @@ private boolean processStop() { } private static Object determineFileId(FileSystem fs, FileSplit split, - boolean allowSynthetic, boolean checkDefaultFs) throws IOException { + boolean allowSynthetic, boolean checkDefaultFs, boolean forceSynthetic) throws IOException { if (split instanceof OrcSplit) { Object fileKey = ((OrcSplit)split).getFileKey(); if (fileKey != null) { @@ -472,7 +474,7 @@ private static Object determineFileId(FileSystem fs, FileSplit split, } } LOG.warn("Split for " + split.getPath() + " (" + split.getClass() + ") does not have file ID"); - return HdfsUtils.getFileId(fs, split.getPath(), allowSynthetic, checkDefaultFs); + return HdfsUtils.getFileId(fs, split.getPath(), allowSynthetic, checkDefaultFs, forceSynthetic); } /** diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java index 2576175..658bc7d 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java @@ -216,7 +216,8 @@ public MemoryBuffer create() { fs = split.getPath().getFileSystem(daemonConf); fileKey = determineFileId(fs, split, HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_CACHE_ALLOW_SYNTHETIC_FILEID), - HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_CACHE_DEFAULT_FS_FILE_ID)); + HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_CACHE_DEFAULT_FS_FILE_ID), + !HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_IO_USE_FILEID_PATH)); cacheTag = HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_TRACK_CACHE_USAGE) ? LlapUtil.getDbAndTableNameForMetrics(split.getPath(), true) : null; this.sourceInputFormat = sourceInputFormat; @@ -1698,12 +1699,12 @@ private boolean processStop() { } private static Object determineFileId(FileSystem fs, FileSplit split, - boolean allowSynthetic, boolean checkDefaultFs) throws IOException { + boolean allowSynthetic, boolean checkDefaultFs, boolean forceSynthetic) throws IOException { /* TODO: support this optionally? this is not OrcSplit, but we could add a custom split. Object fileKey = ((OrcSplit)split).getFileKey(); if (fileKey != null) return fileKey; */ LlapIoImpl.LOG.warn("Split for " + split.getPath() + " (" + split.getClass() + ") does not have file ID"); - return HdfsUtils.getFileId(fs, split.getPath(), allowSynthetic, checkDefaultFs); + return HdfsUtils.getFileId(fs, split.getPath(), allowSynthetic, checkDefaultFs, forceSynthetic); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java index 1158b52..3482cfc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java @@ -45,8 +45,8 @@ private static final Logger LOG = LoggerFactory.getLogger(HdfsUtils.class); public static Object getFileId(FileSystem fileSystem, Path path, - boolean allowSynthetic, boolean checkDefaultFs) throws IOException { - if (fileSystem instanceof DistributedFileSystem) { + boolean allowSynthetic, boolean checkDefaultFs, boolean forceSyntheticIds) throws IOException { + if (forceSyntheticIds == false && fileSystem instanceof DistributedFileSystem) { DistributedFileSystem dfs = (DistributedFileSystem) fileSystem; if ((!checkDefaultFs) || isDefaultFs(dfs)) { Object result = SHIMS.getFileId(dfs, path.toUri().getPath()); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index f34f393..1268521 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -2267,8 +2267,11 @@ private static boolean isStripeSatisfyPredicate( boolean checkDefaultFs = HiveConf.getBoolVar( context.conf, ConfVars.LLAP_CACHE_DEFAULT_FS_FILE_ID); - boolean isDefaultFs = (!checkDefaultFs) || ((fs instanceof DistributedFileSystem) - && HdfsUtils.isDefaultFs((DistributedFileSystem) fs)); + boolean forceSynthetic = + !HiveConf.getBoolVar(context.conf, ConfVars.LLAP_IO_USE_FILEID_PATH); + // if forceSynthetic == true, then assume it is not a defaultFS + boolean isDefaultFs = (forceSynthetic == false) && ((!checkDefaultFs) || ((fs instanceof DistributedFileSystem) + && HdfsUtils.isDefaultFs((DistributedFileSystem) fs))); if (baseFiles.isEmpty()) { assert false : "acid 2.0 no base?!: " + dir; diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java index 8c49056..fd776cf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java @@ -193,7 +193,8 @@ public void initialize( if (metadataCache != null) { cacheKey = HdfsUtils.getFileId(file.getFileSystem(configuration), file, HiveConf.getBoolVar(cacheConf, ConfVars.LLAP_CACHE_ALLOW_SYNTHETIC_FILEID), - HiveConf.getBoolVar(cacheConf, ConfVars.LLAP_CACHE_DEFAULT_FS_FILE_ID)); + HiveConf.getBoolVar(cacheConf, ConfVars.LLAP_CACHE_DEFAULT_FS_FILE_ID), + !HiveConf.getBoolVar(cacheConf, ConfVars.LLAP_IO_USE_FILEID_PATH)); } if (cacheKey != null) { if (HiveConf.getBoolVar(cacheConf, ConfVars.LLAP_TRACK_CACHE_USAGE)) {