diff --git a/pom.xml b/pom.xml
index 880f2354e5..96920ff5f6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -194,7 +194,7 @@
1.7.10
4.0.4
3.0.0-SNAPSHOT
- 0.9.1-SNAPSHOT
+ 0.9.1
0.92.0-incubating
2.2.0
2.2.0
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index be93d19f60..09737fbb1b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -34,7 +34,6 @@
import java.util.Map;
import java.util.NavigableMap;
import java.util.TreeMap;
-import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.CompletionService;
import java.util.concurrent.ExecutorCompletionService;
@@ -367,73 +366,6 @@ public static boolean isOriginal(Footer footer) {
return result;
}
- /**
- * Generate list of columnId which is to be read from the file.
- *
- * @param readerSchema Schema of the file from where read is to be performed
- * @param included list of top level column which is to be read
- * @param conf Hive conf to access conf "hive.io.file.readNestedColumn.paths"
- * @return boolean array which corresponds to list of columnIds need to be read.
- */
- private static boolean[] genIncludedColumns(Configuration conf, TypeDescription readerSchema,
- List included) {
- boolean[] result = new boolean[readerSchema.getMaximumId() + 1];
- if (included != null) {
- /* Include nested column only which are present in conf "hive.io.file.readNestedColumn.paths" */
- Set nestedColumnPaths = ColumnProjectionUtils.getNestedColumnPaths(conf);
- if (nestedColumnPaths.size() != 0) {
- result[0] = true;
- for (String column : nestedColumnPaths) {
- String[] columnPath = column.split("\\.");
- result = setIncludeForNestedColumns(columnPath, 0, readerSchema, result);
- }
- } else {
- /* This is a fail-safe in-case we fail to obtain nested column paths correctly */
- result = genIncludedColumns(readerSchema, included);
- }
- } else {
- /* Included will be null in select * scenario and hence filling all as true */
- Arrays.fill(result, true);
- }
- return result;
- }
-
- /**
- * Convert ColumnPath to ColumnId and set ColumnId in Include boolean array to true.
- *
- * @param columnPath "a.b.c"
- * @param position index counter of columnPath field.
- * @param readerSchema schema in which column name is to searched.
- * @param include boolean array indicate which all columns are needed to be read from file.
- * @return filled "include" boolean array.
- */
- private static boolean[] setIncludeForNestedColumns(String[] columnPath, int position,
- TypeDescription readerSchema, boolean[] include) {
- if (position == (columnPath.length) && readerSchema.getChildren() != null) {
- /* If the column path is "a.b.c". If c is nested structure then set true for all the children columns. */
- for (int col = readerSchema.getId(); col <= readerSchema.getMaximumId(); ++col) {
- include[col] = true;
- }
- } else if (position == (columnPath.length) && readerSchema.getChildren() == null) {
- /* If the column path is "a.b.c". If c is a column then set true for column c. */
- include[readerSchema.getId()] = true;
- } else {
- /*
- * If the column Path is "a.b.c".
- * Then set true for a, b and c columns in depth first search fashion.
- * */
- int fieldId = 0;
- String columnName = columnPath[position];
- while (!columnName.equalsIgnoreCase(readerSchema.getFieldNames().get(fieldId))) {
- fieldId++;
- }
- TypeDescription childSchema = readerSchema.getChildren().get(fieldId);
- include = setIncludeForNestedColumns(columnPath, ++position, childSchema, include);
- include[childSchema.getId()] = true;
- }
- return include;
- }
-
/**
* Reverses genIncludedColumns; produces the table columns indexes from ORC included columns.
* @param readerSchema The ORC reader schema for the table.
@@ -480,9 +412,9 @@ public static boolean isOriginal(Footer footer) {
*/
static boolean[] genIncludedColumns(TypeDescription readerSchema,
Configuration conf) {
- if (!ColumnProjectionUtils.isReadAllColumns(conf)) {
+ if (!ColumnProjectionUtils.isReadAllColumns(conf)) {
List included = ColumnProjectionUtils.getReadColumnIDs(conf);
- return genIncludedColumns(conf, readerSchema, included);
+ return genIncludedColumns(readerSchema, included);
} else {
return null;
}
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index aaefafaa61..cec1d34535 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -1706,80 +1706,6 @@ public void testProjectedColumnSize() throws Exception {
assertEquals(376804, result.getProjectedColumnsUncompressedSize());
}
-
- @Test
- public void testProjectedColumnSizeWithNestedConf() throws Exception {
- long[] stripeSizes =
- new long[]{200, 200, 200, 200, 100};
- MockFileSystem fs = new MockFileSystem(conf,
- new MockFile("mock:/a/file", 500,
- createMockOrcFile(stripeSizes),
- new MockBlock("host1-1", "host1-2", "host1-3"),
- new MockBlock("host2-1", "host0", "host2-3"),
- new MockBlock("host0", "host3-2", "host3-3"),
- new MockBlock("host4-1", "host4-2", "host4-3"),
- new MockBlock("host5-1", "host5-2", "host5-3")));
- HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, 300);
- HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, 200);
- conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
- //conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
- conf.set(ColumnProjectionUtils.READ_NESTED_COLUMN_PATH_CONF_STR, "col1");
- OrcInputFormat.Context context = new OrcInputFormat.Context(conf);
- OrcInputFormat.SplitGenerator splitter =
- new OrcInputFormat.SplitGenerator(new OrcInputFormat.SplitInfo(context, fs,
- fs.getFileStatus(new Path("/a/file")), null, null, true,
- new ArrayList(), true, null, null), null, true, true);
- List results = splitter.call();
- OrcSplit result = results.get(0);
- assertEquals(3, results.size());
- assertEquals(3, result.getStart());
- assertEquals(400, result.getLength());
- assertEquals(167468, result.getProjectedColumnsUncompressedSize());
- result = results.get(1);
- assertEquals(403, result.getStart());
- assertEquals(400, result.getLength());
- assertEquals(167468, result.getProjectedColumnsUncompressedSize());
- result = results.get(2);
- assertEquals(803, result.getStart());
- assertEquals(100, result.getLength());
- assertEquals(41867, result.getProjectedColumnsUncompressedSize());
-
- // test min = 0, max = 0 generates each stripe
- HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, 0);
- HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, 0);
- context = new OrcInputFormat.Context(conf);
- splitter = new OrcInputFormat.SplitGenerator(new OrcInputFormat.SplitInfo(context, fs,
- fs.getFileStatus(new Path("/a/file")), null, null, true,
- new ArrayList(),
- true, null, null), null, true, true);
- results = splitter.call();
- assertEquals(5, results.size());
- for (int i = 0; i < stripeSizes.length; ++i) {
- assertEquals("checking stripe " + i + " size",
- stripeSizes[i], results.get(i).getLength());
- if (i == stripeSizes.length - 1) {
- assertEquals(41867, results.get(i).getProjectedColumnsUncompressedSize());
- } else {
- assertEquals(83734, results.get(i).getProjectedColumnsUncompressedSize());
- }
- }
-
- // single split
- HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, 1000);
- HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, 100000);
- context = new OrcInputFormat.Context(conf);
- splitter = new OrcInputFormat.SplitGenerator(new OrcInputFormat.SplitInfo(context, fs,
- fs.getFileStatus(new Path("/a/file")), null, null, true,
- new ArrayList(),
- true, null, null), null, true, true);
- results = splitter.call();
- assertEquals(1, results.size());
- result = results.get(0);
- assertEquals(3, result.getStart());
- assertEquals(900, result.getLength());
- assertEquals(376804, result.getProjectedColumnsUncompressedSize());
- }
-
@Test
public void testInOutFormat() throws Exception {
Properties properties = new Properties();
@@ -4024,115 +3950,4 @@ public void testColumnProjectionWithAcid() throws Exception {
assertEquals(1000, record);
reader.close();
}
-
- @Test
- public void testColumnProjectionWithAcidWithNestedConf() throws Exception {
- Path baseDir = new Path(workDir, "base_00100");
- testFilePath = new Path(baseDir, "bucket_00000");
- fs.mkdirs(baseDir);
- fs.delete(testFilePath, true);
- TypeDescription fileSchema =
- TypeDescription.fromString("struct,d:string>>");
- Writer writer = OrcFile.createWriter(testFilePath,
- OrcFile.writerOptions(conf)
- .fileSystem(fs)
- .setSchema(fileSchema)
- .compress(org.apache.orc.CompressionKind.NONE));
- VectorizedRowBatch batch = fileSchema.createRowBatch(1000);
- batch.size = 1000;
- StructColumnVector scv = (StructColumnVector)batch.cols[5];
- // operation
- batch.cols[0].isRepeating = true;
- ((LongColumnVector) batch.cols[0]).vector[0] = 0;
- // original transaction
- batch.cols[1].isRepeating = true;
- ((LongColumnVector) batch.cols[1]).vector[0] = 1;
- // bucket
- batch.cols[2].isRepeating = true;
- ((LongColumnVector) batch.cols[2]).vector[0] = 0;
- // current transaction
- batch.cols[4].isRepeating = true;
- ((LongColumnVector) batch.cols[4]).vector[0] = 1;
-
- LongColumnVector lcv = (LongColumnVector)
- ((StructColumnVector) scv.fields[1]).fields[0];
- for(int r=0; r < 1000; r++) {
- // row id
- ((LongColumnVector) batch.cols[3]).vector[r] = r;
- // a
- ((LongColumnVector) scv.fields[0]).vector[r] = r * 42;
- // b.c
- lcv.vector[r] = r * 10001;
- // d
- ((BytesColumnVector) scv.fields[2]).setVal(r,
- Integer.toHexString(r).getBytes(StandardCharsets.UTF_8));
- }
- writer.addRowBatch(batch);
- writer.addUserMetadata(OrcRecordUpdater.ACID_KEY_INDEX_NAME,
- ByteBuffer.wrap("0,0,999".getBytes(StandardCharsets.UTF_8)));
- writer.close();
- long fileLength = fs.getFileStatus(testFilePath).getLen();
-
- // test with same schema with include
- conf.set(ValidTxnList.VALID_TXNS_KEY, "100:99:");
- conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "a,b,d");
- conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "int,struct,string");
- conf.set(ColumnProjectionUtils.READ_ALL_COLUMNS, "false");
- conf.set(ColumnProjectionUtils.READ_NESTED_COLUMN_PATH_CONF_STR, "a,d");
-// conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0,2");
- OrcSplit split = new OrcSplit(testFilePath, null, 0, fileLength,
- new String[0], null, false, true,
- new ArrayList(), fileLength, fileLength, workDir);
- OrcInputFormat inputFormat = new OrcInputFormat();
- AcidInputFormat.RowReader reader = inputFormat.getReader(split,
- new AcidInputFormat.Options(conf));
- int record = 0;
- RecordIdentifier id = reader.createKey();
- OrcStruct struct = reader.createValue();
- while (reader.next(id, struct)) {
- assertEquals("id " + record, record, id.getRowId());
- assertEquals("bucket " + record, 0, id.getBucketProperty());
- assertEquals("trans " + record, 1, id.getTransactionId());
- assertEquals("a " + record,
- 42 * record, ((IntWritable) struct.getFieldValue(0)).get());
- assertEquals(null, struct.getFieldValue(1));
- assertEquals("d " + record,
- Integer.toHexString(record), struct.getFieldValue(2).toString());
- record += 1;
- }
- assertEquals(1000, record);
- reader.close();
-
- // test with schema evolution and include
- conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "a,b,d,f");
- conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "int,struct,string,int");
- conf.set(ColumnProjectionUtils.READ_ALL_COLUMNS, "false");
-// conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0,2,3");
- conf.set(ColumnProjectionUtils.READ_NESTED_COLUMN_PATH_CONF_STR, "a,d,fmvn checkstyle:checkstyle-aggregate");
- split = new OrcSplit(testFilePath, null, 0, fileLength,
- new String[0], null, false, true,
- new ArrayList(), fileLength, fileLength, workDir);
- inputFormat = new OrcInputFormat();
- reader = inputFormat.getReader(split, new AcidInputFormat.Options(conf));
- record = 0;
- id = reader.createKey();
- struct = reader.createValue();
- while (reader.next(id, struct)) {
- assertEquals("id " + record, record, id.getRowId());
- assertEquals("bucket " + record, 0, id.getBucketProperty());
- assertEquals("trans " + record, 1, id.getTransactionId());
- assertEquals("a " + record,
- 42 * record, ((IntWritable) struct.getFieldValue(0)).get());
- assertEquals(null, struct.getFieldValue(1));
- assertEquals("d " + record,
- Integer.toHexString(record), struct.getFieldValue(2).toString());
- assertEquals("f " + record, null, struct.getFieldValue(3));
- record += 1;
- }
- assertEquals(1000, record);
- reader.close();
- }
}