emptyBuckets = Utilities.removeTempOrDuplicateFiles(fs, finalResults,
- unionSuffix, dpLevels, mbc == null ? 0 : mbc.numBuckets, hconf, txnId, stmtId,
+ unionSuffix, dpLevels, mbc == null ? 0 : mbc.numBuckets, hconf, writeId, stmtId,
isMmTable, null, isInsertOverwrite);
// create empty buckets if necessary
if (!emptyBuckets.isEmpty()) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
index ff55f50..6588385 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
@@ -87,7 +87,7 @@
/**
* A record ID column is a virtual column, so it should be separated from normal data column
* processes. A recordIdColumnVector contains RecordIdentifier information in a
- * StructColumnVector. It has three LongColumnVectors as its fields; original transaction IDs,
+ * StructColumnVector. It has three LongColumnVectors as its fields; original write IDs,
* bucket IDs, and row IDs.
*/
private StructColumnVector recordIdColumnVector;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidInputFormat.java
index 1ed35b3..41007e2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidInputFormat.java
@@ -56,9 +56,9 @@
* online transactions systems.
*
* The design changes the layout of data within a partition from being in files
- * at the top level to having base and delta directories. Each write operation
- * will be assigned a sequential global transaction id and each read operation
- * will request the list of valid transaction ids.
+ * at the top level to having base and delta directories. Each write operation in a table
+ * will be assigned a sequential table write id and each read operation
+ * will request the list of valid transactions/write ids.
*
*
* With each new write operation a new delta directory is created with events
* that correspond to inserted, updated, or deleted rows. Each of the files is
- * stored sorted by the original transaction id (ascending), bucket (ascending),
- * row id (ascending), and current transaction id (descending). Thus the files
+ * stored sorted by the original write id (ascending), bucket (ascending),
+ * row id (ascending), and current write id (descending). Thus the files
* can be merged by advancing through the files in parallel.
* The stid is unique id (within the transaction) of the statement that created
* this delta file.
*
* The base files include all transactions from the beginning of time
- * (transaction id 0) to the transaction in the directory name. Delta
- * directories include transactions (inclusive) between the two transaction ids.
+ * (write id 0) to the write id in the directory name. Delta
+ * directories include transactions (inclusive) between the two write ids.
*
- * Because read operations get the list of valid transactions when they start,
+ * Because read operations get the list of valid transactions/write ids when they start,
* all reads are performed on that snapshot, regardless of any transactions that
* are committed afterwards.
*
- * The base and the delta directories have the transaction ids so that major
+ * The base and the delta directories have the write ids so that major
* (merge all deltas into the base) and minor (merge several deltas together)
* compactions can happen while readers continue their processing.
*
@@ -204,7 +204,7 @@ public Reporter getReporter() {
/**
* Get a record reader that provides the user-facing view of the data after
* it has been merged together. The key provides information about the
- * record's identifier (transaction, bucket, record id).
+ * record's identifier (write id, bucket, record id).
* @param split the split to read
* @param options the options to read with
* @return a record reader
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
index 8dc1e8a..ced84b3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
@@ -623,7 +623,7 @@ public String toString() {
/**
* Get the list of obsolete directories. After filtering out bases and
- * deltas that are not selected by the valid transaction list, return the
+ * deltas that are not selected by the valid transaction/write ids list, return the
* list of original files, bases, and deltas that have been replaced by
* more up to date ones. Not {@code null}.
*/
@@ -695,7 +695,7 @@ public boolean isRawFormat() {
/**
* Compactions (Major/Minor) merge deltas/bases but delete of old files
* happens in a different process; thus it's possible to have bases/deltas with
- * overlapping txnId boundaries. The sort order helps figure out the "best" set of files
+ * overlapping writeId boundaries. The sort order helps figure out the "best" set of files
* to use to get data.
* This sorts "wider" delta before "narrower" i.e. delta_5_20 sorts before delta_5_10 (and delta_11_20)
*/
@@ -718,7 +718,7 @@ else if(statementId != parsedDelta.statementId) {
/**
* We want deltas after minor compaction (w/o statementId) to sort
* earlier so that getAcidState() considers compacted files (into larger ones) obsolete
- * Before compaction, include deltas with all statementIds for a given txnId
+ * Before compaction, include deltas with all statementIds for a given writeId
* in a {@link org.apache.hadoop.hive.ql.io.AcidUtils.Directory}
*/
if(statementId < parsedDelta.statementId) {
@@ -749,9 +749,9 @@ else if(statementId != parsedDelta.statementId) {
/**
* Convert the list of deltas into an equivalent list of begin/end
- * transaction id pairs. Assumes {@code deltas} is sorted.
+ * write id pairs. Assumes {@code deltas} is sorted.
* @param deltas
- * @return the list of transaction ids to serialize
+ * @return the list of write ids to serialize
*/
public static List serializeDeltas(List deltas) {
List result = new ArrayList<>(deltas.size());
@@ -774,12 +774,12 @@ else if(statementId != parsedDelta.statementId) {
}
/**
- * Convert the list of begin/end transaction id pairs to a list of delete delta
+ * Convert the list of begin/end write id pairs to a list of delete delta
* directories. Note that there may be multiple delete_delta files for the exact same txn range starting
* with 2.2.x;
* see {@link org.apache.hadoop.hive.ql.io.AcidUtils#deltaSubdir(long, long, int)}
* @param root the root directory
- * @param deleteDeltas list of begin/end transaction id pairs
+ * @param deleteDeltas list of begin/end write id pairs
* @return the list of delta paths
*/
public static Path[] deserializeDeleteDeltas(Path root, final List deleteDeltas) throws IOException {
@@ -879,7 +879,7 @@ public static Directory getAcidState(Path directory,
* Get the ACID state of the given directory. It finds the minimal set of
* base and diff directories. Note that because major compactions don't
* preserve the history, we can't use a base directory that includes a
- * transaction id that we must exclude.
+ * write id that we must exclude.
* @param directory the partition directory to analyze
* @param conf the configuration
* @param writeIdList the list of write ids that we are reading
@@ -1075,7 +1075,7 @@ public boolean isBaseInRawFormat() {
* files within the snapshot.
* A base produced by Insert Overwrite is different. Logically it's a delta file but one that
* causes anything written previously is ignored (hence the overwrite). In this case, base_x
- * is visible if txnid:x is committed for current reader.
+ * is visible if writeid:x is committed for current reader.
*/
private static boolean isValidBase(long baseWriteId, ValidWriteIdList writeIdList, Path baseDir,
FileSystem fs) throws IOException {
@@ -1645,7 +1645,7 @@ public static boolean isRawFormat(Path baseOrDeltaDir, FileSystem fs) throws IOE
try {
Reader reader = OrcFile.createReader(dataFile, OrcFile.readerOptions(fs.getConf()));
/*
- acid file would have schema like > so could
+ acid file would have schema like > so could
check it this way once/if OrcRecordUpdater.ACID_KEY_INDEX_NAME is removed
TypeDescription schema = reader.getSchema();
List columns = schema.getFieldNames();
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/RecordIdentifier.java b/ql/src/java/org/apache/hadoop/hive/ql/io/RecordIdentifier.java
index 1f673da..607abfd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/RecordIdentifier.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/RecordIdentifier.java
@@ -45,7 +45,7 @@
*/
public enum Field {
//note the enum names match field names in the struct
- transactionId(TypeInfoFactory.longTypeInfo,
+ writeId(TypeInfoFactory.longTypeInfo,
PrimitiveObjectInspectorFactory.javaLongObjectInspector),
bucketId(TypeInfoFactory.intTypeInfo, PrimitiveObjectInspectorFactory.javaIntObjectInspector),
rowId(TypeInfoFactory.longTypeInfo, PrimitiveObjectInspectorFactory.javaLongObjectInspector);
@@ -88,13 +88,13 @@ public static void toArray(RecordIdentifier ri, Object[] struct) {
Arrays.fill(struct, null);
return;
}
- struct[Field.transactionId.ordinal()] = ri.getWriteId();
+ struct[Field.writeId.ordinal()] = ri.getWriteId();
struct[Field.bucketId.ordinal()] = ri.getBucketProperty();
struct[Field.rowId.ordinal()] = ri.getRowId();
}
}
- private long transactionId;
+ private long writeId;
private int bucketId;
private long rowId;
@@ -102,7 +102,7 @@ public RecordIdentifier() {
}
public RecordIdentifier(long writeId, int bucket, long rowId) {
- this.transactionId = writeId;
+ this.writeId = writeId;
this.bucketId = bucket;
this.rowId = rowId;
}
@@ -114,7 +114,7 @@ public RecordIdentifier(long writeId, int bucket, long rowId) {
* @param rowId the row id
*/
public void setValues(long writeId, int bucketId, long rowId) {
- this.transactionId = writeId;
+ this.writeId = writeId;
this.bucketId = bucketId;
this.rowId = rowId;
}
@@ -124,7 +124,7 @@ public void setValues(long writeId, int bucketId, long rowId) {
* @param other the object to copy from
*/
public void set(RecordIdentifier other) {
- this.transactionId = other.transactionId;
+ this.writeId = other.writeId;
this.bucketId = other.bucketId;
this.rowId = other.rowId;
}
@@ -138,7 +138,7 @@ public void setRowId(long rowId) {
* @return the write id
*/
public long getWriteId() {
- return transactionId;
+ return writeId;
}
/**
@@ -161,8 +161,8 @@ protected int compareToInternal(RecordIdentifier other) {
if (other == null) {
return -1;
}
- if (transactionId != other.transactionId) {
- return transactionId < other.transactionId ? -1 : 1;
+ if (writeId != other.writeId) {
+ return writeId < other.writeId ? -1 : 1;
}
if (bucketId != other.bucketId) {
return bucketId < other.bucketId ? - 1 : 1;
@@ -183,14 +183,14 @@ public int compareTo(RecordIdentifier other) {
@Override
public void write(DataOutput dataOutput) throws IOException {
- dataOutput.writeLong(transactionId);
+ dataOutput.writeLong(writeId);
dataOutput.writeInt(bucketId);
dataOutput.writeLong(rowId);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
- transactionId = dataInput.readLong();
+ writeId = dataInput.readLong();
bucketId = dataInput.readInt();
rowId = dataInput.readLong();
}
@@ -204,14 +204,14 @@ public boolean equals(Object other) {
return false;
}
RecordIdentifier oth = (RecordIdentifier) other;
- return oth.transactionId == transactionId &&
+ return oth.writeId == writeId &&
oth.bucketId == bucketId &&
oth.rowId == rowId;
}
@Override
public int hashCode() {
int result = 17;
- result = 31 * result + (int)(transactionId ^ (transactionId >>> 32));
+ result = 31 * result + (int)(writeId ^ (writeId >>> 32));
result = 31 * result + bucketId;
result = 31 * result + (int)(rowId ^ (rowId >>> 32));
return result;
@@ -223,7 +223,7 @@ public String toString() {
BucketCodec.determineVersion(bucketId);
String s = "(" + codec.getVersion() + "." + codec.decodeWriterId(bucketId) +
"." + codec.decodeStatementId(bucketId) + ")";
- return "{originalWriteId: " + transactionId + ", " + bucketToString() + ", row: " + getRowId() +"}";
+ return "{originalWriteId: " + writeId + ", " + bucketToString() + ", row: " + getRowId() +"}";
}
protected String bucketToString() {
BucketCodec codec =
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index cf0d013..fe109d7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -149,8 +149,8 @@
* }
*
* Each AcidEvent object corresponds to an update event. The
- * originalTransaction, bucket, and rowId are the unique identifier for the row.
- * The operation and currentTransaction are the operation and the transaction
+ * originalWriteId, bucket, and rowId are the unique identifier for the row.
+ * The operation and currentWriteId are the operation and the table write id within current txn
* that added this event. Insert and update events include the entire row, while
* delete events have null for row.
*/
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java
index 4059c53..5655ee9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java
@@ -131,7 +131,7 @@ public int compareTo(RecordIdentifier other) {
}
if(isDeleteEvent != oth.isDeleteEvent) {
//this is to break a tie if insert + delete of a given row is done within the same
- //txn (so that currentTransactionId is the same for both events) and we want the
+ //txn (so that currentWriteId is the same for both events) and we want the
//delete event to sort 1st since it needs to be sent up so that
// OrcInputFormat.getReader(InputSplit inputSplit, Options options) can skip it.
return isDeleteEvent ? -1 : +1;
@@ -330,9 +330,9 @@ public void next(OrcStruct next) throws IOException {
final int bucketId;
final int bucketProperty;
/**
- * TransactionId to use when generating synthetic ROW_IDs
+ * Write Id to use when generating synthetic ROW_IDs
*/
- final long transactionId;
+ final long writeId;
/**
* @param statementId - this should be from delta_x_y_stmtId file name. Imagine 2 load data
* statements in 1 txn. The stmtId will be embedded in
@@ -344,7 +344,7 @@ public void next(OrcStruct next) throws IOException {
this.bucketId = bucketId;
assert bucketId >= 0 : "don't support non-bucketed tables yet";
this.bucketProperty = encodeBucketId(conf, bucketId, statementId);
- transactionId = mergeOptions.getTransactionId();
+ writeId = mergeOptions.getWriteId();
}
@Override public final OrcStruct nextRecord() {
return nextRecord;
@@ -374,9 +374,9 @@ final boolean nextFromCurrentFile(OrcStruct next) throws IOException {
new IntWritable(OrcRecordUpdater.INSERT_OPERATION);
nextRecord().setFieldValue(OrcRecordUpdater.OPERATION, operation);
nextRecord().setFieldValue(OrcRecordUpdater.CURRENT_WRITEID,
- new LongWritable(transactionId));
+ new LongWritable(writeId));
nextRecord().setFieldValue(OrcRecordUpdater.ORIGINAL_WRITEID,
- new LongWritable(transactionId));
+ new LongWritable(writeId));
nextRecord().setFieldValue(OrcRecordUpdater.BUCKET,
new IntWritable(bucketProperty));
nextRecord().setFieldValue(OrcRecordUpdater.ROW_ID,
@@ -388,17 +388,17 @@ final boolean nextFromCurrentFile(OrcStruct next) throws IOException {
((IntWritable) next.getFieldValue(OrcRecordUpdater.OPERATION))
.set(OrcRecordUpdater.INSERT_OPERATION);
((LongWritable) next.getFieldValue(OrcRecordUpdater.ORIGINAL_WRITEID))
- .set(transactionId);
+ .set(writeId);
((IntWritable) next.getFieldValue(OrcRecordUpdater.BUCKET))
.set(bucketProperty);
((LongWritable) next.getFieldValue(OrcRecordUpdater.CURRENT_WRITEID))
- .set(transactionId);
+ .set(writeId);
((LongWritable) next.getFieldValue(OrcRecordUpdater.ROW_ID))
.set(nextRowId);
nextRecord().setFieldValue(OrcRecordUpdater.ROW,
getRecordReader().next(OrcRecordUpdater.getRow(next)));
}
- key.setValues(transactionId, bucketProperty, nextRowId, transactionId, false);
+ key.setValues(writeId, bucketProperty, nextRowId, writeId, false);
if (getMaxKey() != null && key.compareRow(getMaxKey()) > 0) {
if (LOG.isDebugEnabled()) {
LOG.debug("key " + key + " > maxkey " + getMaxKey());
@@ -498,7 +498,7 @@ static int encodeBucketId(Configuration conf, int bucketId, int statementId) {
* If this is not the 1st file, set minKey 1 less than the start of current file
* (Would not need to set minKey if we knew that there are no delta files)
* {@link #advanceToMinKey()} needs this */
- newMinKey = new RecordIdentifier(transactionId, bucketProperty,rowIdOffset - 1);
+ newMinKey = new RecordIdentifier(writeId, bucketProperty,rowIdOffset - 1);
}
if (maxKey != null) {
maxKey.setRowId(maxKey.getRowId() + rowIdOffset);
@@ -511,7 +511,7 @@ static int encodeBucketId(Configuration conf, int bucketId, int statementId) {
* of the file so we want to leave it blank to make sure any insert events in delta
* files are included; Conversely, if it's not the last file, set the maxKey so that
* events from deltas that don't modify anything in the current split are excluded*/
- newMaxKey = new RecordIdentifier(transactionId, bucketProperty,
+ newMaxKey = new RecordIdentifier(writeId, bucketProperty,
rowIdOffset + reader.getNumberOfRows() - 1);
}
this.minKey = newMinKey;
@@ -800,7 +800,7 @@ private KeyInterval discoverKeyBounds(Reader reader,
private Path baseDir;
private boolean isMajorCompaction = false;
private boolean isDeleteReader = false;
- private long transactionId = 0;
+ private long writeId = 0;
Options copyIndex(int copyIndex) {
assert copyIndex >= 0;
this.copyIndex = copyIndex;
@@ -829,8 +829,8 @@ Options isDeleteReader(boolean isDeleteReader) {
assert !isCompacting;
return this;
}
- Options transactionId(long transactionId) {
- this.transactionId = transactionId;
+ Options writeId(long writeId) {
+ this.writeId = writeId;
return this;
}
Options baseDir(Path baseDir) {
@@ -876,10 +876,10 @@ boolean isDeleteReader() {
* for reading "original" files - i.e. not native acid schema. Default value of 0 is
* appropriate for files that existed in a table before it was made transactional. 0 is the
* primordial transaction. For non-native files resulting from Load Data command, they
- * are located and base_x or delta_x_x and then transactionId == x.
+ * are located and base_x or delta_x_x and then writeId == x.
*/
- long getTransactionId() {
- return transactionId;
+ long getWriteId() {
+ return writeId;
}
/**
@@ -1158,7 +1158,7 @@ public Options clone() {
static final class TransactionMetaData {
final long syntheticWriteId;
/**
- * folder which determines the transaction id to use in synthetic ROW_IDs
+ * folder which determines the write id to use in synthetic ROW_IDs
*/
final Path folder;
final int statementId;
@@ -1175,7 +1175,7 @@ static TransactionMetaData findWriteIDForSynthetcRowIDs(Path splitPath, Path roo
Path parent = splitPath.getParent();
if(rootPath.equals(parent)) {
//the 'isOriginal' file is at the root of the partition (or table) thus it is
- //from a pre-acid conversion write and belongs to primordial txnid:0.
+ //from a pre-acid conversion write and belongs to primordial writeid:0.
return new TransactionMetaData(0, parent);
}
while(parent != null && !rootPath.equals(parent)) {
@@ -1199,7 +1199,7 @@ static TransactionMetaData findWriteIDForSynthetcRowIDs(Path splitPath, Path roo
if(parent == null) {
//spit is marked isOriginal but it's not an immediate child of a partition nor is it in a
//base/ or delta/ - this should never happen
- throw new IllegalStateException("Cannot determine transaction id for original file "
+ throw new IllegalStateException("Cannot determine write id for original file "
+ splitPath + " in " + rootPath);
}
//"warehouse/t/HIVE_UNION_SUBDIR_15/000000_0" is a meaningful path for nonAcid2acid
@@ -1215,8 +1215,8 @@ static TransactionMetaData findWriteIDForSynthetcRowIDs(Path splitPath, Path roo
* in {@link AcidUtils.Directory#getOriginalFiles()}
* @return modified clone of {@code baseOptions}
*/
- private Options modifyForNonAcidSchemaRead(Options baseOptions, long transactionId, Path rootPath) {
- return baseOptions.clone().transactionId(transactionId).rootPath(rootPath);
+ private Options modifyForNonAcidSchemaRead(Options baseOptions, long writeId, Path rootPath) {
+ return baseOptions.clone().writeId(writeId).rootPath(rootPath);
}
/**
* This determines the set of {@link ReaderPairAcid} to create for a given delta/.
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java
index 77736ee..8caa265 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java
@@ -230,11 +230,11 @@ private VectorizedOrcAcidRowBatchReader(JobConf conf, OrcSplit orcSplit, Reporte
private static final class OffsetAndBucketProperty {
private final long rowIdOffset;
private final int bucketProperty;
- private final long syntheticTxnId;
- private OffsetAndBucketProperty(long rowIdOffset, int bucketProperty, long syntheticTxnId) {
+ private final long syntheticWriteId;
+ private OffsetAndBucketProperty(long rowIdOffset, int bucketProperty, long syntheticWriteId) {
this.rowIdOffset = rowIdOffset;
this.bucketProperty = bucketProperty;
- this.syntheticTxnId = syntheticTxnId;
+ this.syntheticWriteId = syntheticWriteId;
}
}
/**
@@ -253,7 +253,7 @@ private OffsetAndBucketProperty computeOffsetAndBucket(
if (!needSyntheticRowIds(split.isOriginal(), !deleteEventRegistry.isEmpty(), rowIdProjected)) {
if(split.isOriginal()) {
/**
- * Even if we don't need to project ROW_IDs, we still need to check the transaction ID that
+ * Even if we don't need to project ROW_IDs, we still need to check the write ID that
* created the file to see if it's committed. See more in
* {@link #next(NullWritable, VectorizedRowBatch)}. (In practice getAcidState() should
* filter out base/delta files but this makes fewer dependencies)
@@ -352,7 +352,7 @@ private static boolean areRowIdsProjected(VectorizedRowBatchCtx rbCtx) {
/**
* There are 2 types of schema from the {@link #baseReader} that this handles. In the case
* the data was written to a transactional table from the start, every row is decorated with
- * transaction related info and looks like >.
+ * transaction related info and looks like >.
*
* The other case is when data was written to non-transactional table and thus only has the user
* data: . Then this table was then converted to a transactional table but the data
@@ -360,7 +360,7 @@ private static boolean areRowIdsProjected(VectorizedRowBatchCtx rbCtx) {
*
* In this case we may need to decorate the outgoing data with transactional column values at
* read time. (It's done somewhat out of band via VectorizedRowBatchCtx - ask Teddy Choi).
- * The "otid, writerId, rowid" columns represent {@link RecordIdentifier}. They are assigned
+ * The "owid, writerId, rowid" columns represent {@link RecordIdentifier}. They are assigned
* each time the table is read in a way that needs to project {@link VirtualColumn#ROWID}.
* Major compaction will attach these values to each row permanently.
* It's critical that these generated column values are assigned exactly the same way by each
@@ -420,8 +420,8 @@ public boolean next(NullWritable key, VectorizedRowBatch value) throws IOExcepti
// Handle synthetic row IDs for the original files.
innerRecordIdColumnVector = handleOriginalFile(selectedBitSet, innerRecordIdColumnVector);
} else {
- // Case 1- find rows which belong to transactions that are not valid.
- findRecordsWithInvalidTransactionIds(vectorizedRowBatchBase, selectedBitSet);
+ // Case 1- find rows which belong to write Ids that are not valid.
+ findRecordsWithInvalidWriteIds(vectorizedRowBatchBase, selectedBitSet);
}
// Case 2- find rows which have been deleted.
@@ -495,7 +495,7 @@ public boolean next(NullWritable key, VectorizedRowBatch value) throws IOExcepti
*/
recordIdColumnVector.fields[0].noNulls = true;
recordIdColumnVector.fields[0].isRepeating = true;
- ((LongColumnVector)recordIdColumnVector.fields[0]).vector[0] = syntheticProps.syntheticTxnId;
+ ((LongColumnVector)recordIdColumnVector.fields[0]).vector[0] = syntheticProps.syntheticWriteId;
/**
* This is {@link RecordIdentifier#getBucketProperty()}
* Also see {@link BucketCodec}
@@ -521,21 +521,21 @@ public boolean next(NullWritable key, VectorizedRowBatch value) throws IOExcepti
//these are insert events so (original txn == current) txn for all rows
innerRecordIdColumnVector[OrcRecordUpdater.CURRENT_WRITEID] = recordIdColumnVector.fields[0];
}
- if(syntheticProps.syntheticTxnId > 0) {
+ if(syntheticProps.syntheticWriteId > 0) {
//"originals" (written before table was converted to acid) is considered written by
- // txnid:0 which is always committed so there is no need to check wrt invalid transactions
+ // writeid:0 which is always committed so there is no need to check wrt invalid write Ids
//But originals written by Load Data for example can be in base_x or delta_x_x so we must
//check if 'x' is committed or not evn if ROW_ID is not needed in the Operator pipeline.
if (needSyntheticRowId) {
- findRecordsWithInvalidTransactionIds(innerRecordIdColumnVector,
+ findRecordsWithInvalidWriteIds(innerRecordIdColumnVector,
vectorizedRowBatchBase.size, selectedBitSet);
} else {
/*since ROW_IDs are not needed we didn't create the ColumnVectors to hold them but we
* still have to check if the data being read is committed as far as current
* reader (transactions) is concerned. Since here we are reading 'original' schema file,
- * all rows in it have been created by the same txn, namely 'syntheticProps.syntheticTxnId'
+ * all rows in it have been created by the same txn, namely 'syntheticProps.syntheticWriteId'
*/
- if (!validWriteIdList.isWriteIdValid(syntheticProps.syntheticTxnId)) {
+ if (!validWriteIdList.isWriteIdValid(syntheticProps.syntheticWriteId)) {
selectedBitSet.clear(0, vectorizedRowBatchBase.size);
}
}
@@ -543,29 +543,29 @@ public boolean next(NullWritable key, VectorizedRowBatch value) throws IOExcepti
return innerRecordIdColumnVector;
}
- private void findRecordsWithInvalidTransactionIds(VectorizedRowBatch batch, BitSet selectedBitSet) {
- findRecordsWithInvalidTransactionIds(batch.cols, batch.size, selectedBitSet);
+ private void findRecordsWithInvalidWriteIds(VectorizedRowBatch batch, BitSet selectedBitSet) {
+ findRecordsWithInvalidWriteIds(batch.cols, batch.size, selectedBitSet);
}
- private void findRecordsWithInvalidTransactionIds(ColumnVector[] cols, int size, BitSet selectedBitSet) {
+ private void findRecordsWithInvalidWriteIds(ColumnVector[] cols, int size, BitSet selectedBitSet) {
if (cols[OrcRecordUpdater.CURRENT_WRITEID].isRepeating) {
// When we have repeating values, we can unset the whole bitset at once
- // if the repeating value is not a valid transaction.
- long currentTransactionIdForBatch = ((LongColumnVector)
+ // if the repeating value is not a valid write id.
+ long currentWriteIdForBatch = ((LongColumnVector)
cols[OrcRecordUpdater.CURRENT_WRITEID]).vector[0];
- if (!validWriteIdList.isWriteIdValid(currentTransactionIdForBatch)) {
+ if (!validWriteIdList.isWriteIdValid(currentWriteIdForBatch)) {
selectedBitSet.clear(0, size);
}
return;
}
- long[] currentTransactionVector =
+ long[] currentWriteIdVector =
((LongColumnVector) cols[OrcRecordUpdater.CURRENT_WRITEID]).vector;
// Loop through the bits that are set to true and mark those rows as false, if their
- // current transactions are not valid.
+ // current write ids are not valid.
for (int setBitIndex = selectedBitSet.nextSetBit(0);
setBitIndex >= 0;
setBitIndex = selectedBitSet.nextSetBit(setBitIndex+1)) {
- if (!validWriteIdList.isWriteIdValid(currentTransactionVector[setBitIndex])) {
+ if (!validWriteIdList.isWriteIdValid(currentWriteIdVector[setBitIndex])) {
selectedBitSet.clear(setBitIndex);
}
}
@@ -690,7 +690,7 @@ public void findDeletedRecords(ColumnVector[] cols, int size, BitSet selectedBit
return;
}
- long[] originalTransaction =
+ long[] originalWriteId =
cols[OrcRecordUpdater.ORIGINAL_WRITEID].isRepeating ? null
: ((LongColumnVector) cols[OrcRecordUpdater.ORIGINAL_WRITEID]).vector;
long[] bucket =
@@ -701,7 +701,7 @@ public void findDeletedRecords(ColumnVector[] cols, int size, BitSet selectedBit
: ((LongColumnVector) cols[OrcRecordUpdater.ROW_ID]).vector;
// The following repeatedX values will be set, if any of the columns are repeating.
- long repeatedOriginalTransaction = (originalTransaction != null) ? -1
+ long repeatedOriginalWriteId = (originalWriteId != null) ? -1
: ((LongColumnVector) cols[OrcRecordUpdater.ORIGINAL_WRITEID]).vector[0];
long repeatedBucket = (bucket != null) ? -1
: ((LongColumnVector) cols[OrcRecordUpdater.BUCKET]).vector[0];
@@ -716,7 +716,7 @@ public void findDeletedRecords(ColumnVector[] cols, int size, BitSet selectedBit
}
RecordIdentifier firstRecordIdInBatch =
new RecordIdentifier(
- originalTransaction != null ? originalTransaction[firstValidIndex] : repeatedOriginalTransaction,
+ originalWriteId != null ? originalWriteId[firstValidIndex] : repeatedOriginalWriteId,
bucket != null ? (int) bucket[firstValidIndex] : (int) repeatedBucket,
rowId != null ? (int) rowId[firstValidIndex] : repeatedRowId);
@@ -724,7 +724,7 @@ public void findDeletedRecords(ColumnVector[] cols, int size, BitSet selectedBit
int lastValidIndex = selectedBitSet.previousSetBit(size - 1);
RecordIdentifier lastRecordIdInBatch =
new RecordIdentifier(
- originalTransaction != null ? originalTransaction[lastValidIndex] : repeatedOriginalTransaction,
+ originalWriteId != null ? originalWriteId[lastValidIndex] : repeatedOriginalWriteId,
bucket != null ? (int) bucket[lastValidIndex] : (int) repeatedBucket,
rowId != null ? (int) rowId[lastValidIndex] : repeatedRowId);
@@ -743,7 +743,7 @@ public void findDeletedRecords(ColumnVector[] cols, int size, BitSet selectedBit
RecordIdentifier currRecordIdInBatch = new RecordIdentifier();
while (isDeleteRecordAvailable && currIndex != -1 && currIndex <= lastValidIndex) {
currRecordIdInBatch.setValues(
- (originalTransaction != null) ? originalTransaction[currIndex] : repeatedOriginalTransaction,
+ (originalWriteId != null) ? originalWriteId[currIndex] : repeatedOriginalWriteId,
(bucket != null) ? (int) bucket[currIndex] : (int) repeatedBucket,
(rowId != null) ? rowId[currIndex] : repeatedRowId);
@@ -780,34 +780,34 @@ public void close() throws IOException {
* An implementation for DeleteEventRegistry that optimizes for performance by loading
* all the delete events into memory at once from all the delete delta files.
* It starts by reading all the delete events through a regular sort merge logic
- * into 3 vectors- one for original transaction id (otid), one for bucket property and one for
+ * into 3 vectors- one for original Write id (owid), one for bucket property and one for
* row id. See {@link BucketCodec} for more about bucket property.
- * The otids are likely to be repeated very often, as a single transaction
- * often deletes thousands of rows. Hence, the otid vector is compressed to only store the
+ * The owids are likely to be repeated very often, as a single transaction
+ * often deletes thousands of rows. Hence, the owid vector is compressed to only store the
* toIndex and fromIndex ranges in the larger row id vector. Now, querying whether a
* record id is deleted or not, is done by performing a binary search on the
- * compressed otid range. If a match is found, then a binary search is then performed on
+ * compressed owid range. If a match is found, then a binary search is then performed on
* the larger rowId vector between the given toIndex and fromIndex. Of course, there is rough
* heuristic that prevents creation of an instance of this class if the memory pressure is high.
* The SortMergedDeleteEventRegistry is then the fallback method for such scenarios.
*/
static class ColumnizedDeleteEventRegistry implements DeleteEventRegistry {
/**
- * A simple wrapper class to hold the (otid, bucketProperty, rowId) pair.
+ * A simple wrapper class to hold the (owid, bucketProperty, rowId) pair.
*/
static class DeleteRecordKey implements Comparable {
- private long originalTransactionId;
+ private long originalWriteId;
/**
* see {@link BucketCodec}
*/
private int bucketProperty;
private long rowId;
DeleteRecordKey() {
- this.originalTransactionId = -1;
+ this.originalWriteId = -1;
this.rowId = -1;
}
- public void set(long otid, int bucketProperty, long rowId) {
- this.originalTransactionId = otid;
+ public void set(long owid, int bucketProperty, long rowId) {
+ this.originalWriteId = owid;
this.bucketProperty = bucketProperty;
this.rowId = rowId;
}
@@ -817,8 +817,8 @@ public int compareTo(DeleteRecordKey other) {
if (other == null) {
return -1;
}
- if (originalTransactionId != other.originalTransactionId) {
- return originalTransactionId < other.originalTransactionId ? -1 : 1;
+ if (originalWriteId != other.originalWriteId) {
+ return originalWriteId < other.originalWriteId ? -1 : 1;
}
if(bucketProperty != other.bucketProperty) {
return bucketProperty < other.bucketProperty ? -1 : 1;
@@ -830,7 +830,7 @@ public int compareTo(DeleteRecordKey other) {
}
@Override
public String toString() {
- return "otid: " + originalTransactionId + " bucketP:" + bucketProperty + " rowid: " + rowId;
+ return "owid: " + originalWriteId + " bucketP:" + bucketProperty + " rowid: " + rowId;
}
}
@@ -881,12 +881,12 @@ public boolean next(DeleteRecordKey deleteRecordKey) throws IOException {
return false; // no more batches to read, exhausted the reader.
}
}
- long currentTransaction = setCurrentDeleteKey(deleteRecordKey);
+ long currentWriteId = setCurrentDeleteKey(deleteRecordKey);
if(!isBucketPropertyRepeating) {
checkBucketId(deleteRecordKey.bucketProperty);
}
++indexPtrInBatch;
- if (validWriteIdList.isWriteIdValid(currentTransaction)) {
+ if (validWriteIdList.isWriteIdValid(currentWriteId)) {
isValidNext = true;
}
}
@@ -897,20 +897,20 @@ public void close() throws IOException {
this.recordReader.close();
}
private long setCurrentDeleteKey(DeleteRecordKey deleteRecordKey) {
- int originalTransactionIndex =
+ int originalWriteIdIndex =
batch.cols[OrcRecordUpdater.ORIGINAL_WRITEID].isRepeating ? 0 : indexPtrInBatch;
- long originalTransaction
- = ((LongColumnVector) batch.cols[OrcRecordUpdater.ORIGINAL_WRITEID]).vector[originalTransactionIndex];
+ long originalWriteId
+ = ((LongColumnVector) batch.cols[OrcRecordUpdater.ORIGINAL_WRITEID]).vector[originalWriteIdIndex];
int bucketPropertyIndex =
batch.cols[OrcRecordUpdater.BUCKET].isRepeating ? 0 : indexPtrInBatch;
int bucketProperty = (int)((LongColumnVector)batch.cols[OrcRecordUpdater.BUCKET]).vector[bucketPropertyIndex];
long rowId = ((LongColumnVector) batch.cols[OrcRecordUpdater.ROW_ID]).vector[indexPtrInBatch];
- int currentTransactionIndex
+ int currentWriteIdIndex
= batch.cols[OrcRecordUpdater.CURRENT_WRITEID].isRepeating ? 0 : indexPtrInBatch;
- long currentTransaction
- = ((LongColumnVector) batch.cols[OrcRecordUpdater.CURRENT_WRITEID]).vector[currentTransactionIndex];
- deleteRecordKey.set(originalTransaction, bucketProperty, rowId);
- return currentTransaction;
+ long currentWriteId
+ = ((LongColumnVector) batch.cols[OrcRecordUpdater.CURRENT_WRITEID]).vector[currentWriteIdIndex];
+ deleteRecordKey.set(originalWriteId, bucketProperty, rowId);
+ return currentWriteId;
}
private void checkBucketId() throws IOException {
isBucketPropertyRepeating = batch.cols[OrcRecordUpdater.BUCKET].isRepeating;
@@ -949,31 +949,31 @@ private void checkBucketId(int bucketPropertyFromRecord) throws IOException {
}
}
/**
- * A CompressedOtid class stores a compressed representation of the original
- * transaction ids (otids) read from the delete delta files. Since the record ids
- * are sorted by (otid, rowId) and otids are highly likely to be repetitive, it is
- * efficient to compress them as a CompressedOtid that stores the fromIndex and
+ * A CompressedOwid class stores a compressed representation of the original
+ * write ids (owids) read from the delete delta files. Since the record ids
+ * are sorted by (owid, rowId) and owids are highly likely to be repetitive, it is
+ * efficient to compress them as a CompressedOwid that stores the fromIndex and
* the toIndex. These fromIndex and toIndex reference the larger vector formed by
* concatenating the correspondingly ordered rowIds.
*/
- private final class CompressedOtid implements Comparable {
- final long originalTransactionId;
+ private final class CompressedOwid implements Comparable {
+ final long originalWriteId;
final int bucketProperty;
final int fromIndex; // inclusive
final int toIndex; // exclusive
- CompressedOtid(long otid, int bucketProperty, int fromIndex, int toIndex) {
- this.originalTransactionId = otid;
+ CompressedOwid(long owid, int bucketProperty, int fromIndex, int toIndex) {
+ this.originalWriteId = owid;
this.bucketProperty = bucketProperty;
this.fromIndex = fromIndex;
this.toIndex = toIndex;
}
@Override
- public int compareTo(CompressedOtid other) {
- // When comparing the CompressedOtid, the one with the lesser value is smaller.
- if (originalTransactionId != other.originalTransactionId) {
- return originalTransactionId < other.originalTransactionId ? -1 : 1;
+ public int compareTo(CompressedOwid other) {
+ // When comparing the CompressedOwid, the one with the lesser value is smaller.
+ if (originalWriteId != other.originalWriteId) {
+ return originalWriteId < other.originalWriteId ? -1 : 1;
}
if(bucketProperty != other.bucketProperty) {
return bucketProperty < other.bucketProperty ? -1 : 1;
@@ -988,14 +988,14 @@ public int compareTo(CompressedOtid other) {
* all delete deltas at once - possibly causing OOM same as for {@link SortMergedDeleteEventRegistry}
* which uses {@link OrcRawRecordMerger}. Why not load all delete_delta sequentially. Each
* dd is sorted by {@link RecordIdentifier} so we could create a BTree like structure where the
- * 1st level is an array of originalTransactionId where each entry points at an array
+ * 1st level is an array of originalWriteId where each entry points at an array
* of bucketIds where each entry points at an array of rowIds. We could probably use ArrayList
* to manage insertion as the structure is built (LinkedList?). This should reduce memory
* footprint (as far as OrcReader to a single reader) - probably bad for LLAP IO
*/
private TreeMap sortMerger;
private long rowIds[];
- private CompressedOtid compressedOtids[];
+ private CompressedOwid compressedOwids[];
private ValidWriteIdList validWriteIdList;
private Boolean isEmpty = null;
@@ -1009,7 +1009,7 @@ public int compareTo(CompressedOtid other) {
+ " isFullAcidTable: " + AcidUtils.isFullAcidScan(conf));
this.sortMerger = new TreeMap();
this.rowIds = null;
- this.compressedOtids = null;
+ this.compressedOwids = null;
int maxEventsInMemory = HiveConf.getIntVar(conf, ConfVars.HIVE_TRANSACTIONAL_NUM_EVENTS_IN_MEMORY);
final boolean isBucketedTable = conf.getInt(hive_metastoreConstants.BUCKET_COUNT, 0) > 0;
@@ -1064,7 +1064,7 @@ public int compareTo(CompressedOtid other) {
readAllDeleteEventsFromDeleteDeltas();
}
}
- isEmpty = compressedOtids == null || rowIds == null;
+ isEmpty = compressedOwids == null || rowIds == null;
} catch(IOException|DeleteEventsOverflowMemoryException e) {
close(); // close any open readers, if there was some exception during initialization.
throw e; // rethrow the exception so that the caller can handle.
@@ -1072,19 +1072,19 @@ public int compareTo(CompressedOtid other) {
}
/**
- * This is not done quite right. The intent of {@link CompressedOtid} is a hedge against
+ * This is not done quite right. The intent of {@link CompressedOwid} is a hedge against
* "delete from T" that generates a huge number of delete events possibly even 2G - max array
* size. (assuming no one txn inserts > 2G rows (in a bucket)). As implemented, the algorithm
- * first loads all data into one array otid[] and rowIds[] which defeats the purpose.
+ * first loads all data into one array owid[] and rowIds[] which defeats the purpose.
* In practice we should be filtering delete evens by min/max ROW_ID from the split. The later
* is also not yet implemented: HIVE-16812.
*/
private void readAllDeleteEventsFromDeleteDeltas() throws IOException {
if (sortMerger == null || sortMerger.isEmpty()) return; // trivial case, nothing to read.
- int distinctOtids = 0;
- long lastSeenOtid = -1;
+ int distinctOwids = 0;
+ long lastSeenOwid = -1;
int lastSeenBucketProperty = -1;
- long otids[] = new long[rowIds.length];
+ long owids[] = new long[rowIds.length];
int[] bucketProperties = new int [rowIds.length];
int index = 0;
@@ -1101,14 +1101,14 @@ private void readAllDeleteEventsFromDeleteDeltas() throws IOException {
Entry entry = sortMerger.pollFirstEntry();
DeleteRecordKey deleteRecordKey = entry.getKey();
DeleteReaderValue deleteReaderValue = entry.getValue();
- otids[index] = deleteRecordKey.originalTransactionId;
+ owids[index] = deleteRecordKey.originalWriteId;
bucketProperties[index] = deleteRecordKey.bucketProperty;
rowIds[index] = deleteRecordKey.rowId;
++index;
- if (lastSeenOtid != deleteRecordKey.originalTransactionId ||
+ if (lastSeenOwid != deleteRecordKey.originalWriteId ||
lastSeenBucketProperty != deleteRecordKey.bucketProperty) {
- ++distinctOtids;
- lastSeenOtid = deleteRecordKey.originalTransactionId;
+ ++distinctOwids;
+ lastSeenOwid = deleteRecordKey.originalWriteId;
lastSeenBucketProperty = deleteRecordKey.bucketProperty;
}
if (deleteReaderValue.next(deleteRecordKey)) {
@@ -1118,49 +1118,49 @@ private void readAllDeleteEventsFromDeleteDeltas() throws IOException {
}
}
- // Once we have processed all the delete events and seen all the distinct otids,
- // we compress the otids into CompressedOtid data structure that records
- // the fromIndex(inclusive) and toIndex(exclusive) for each unique otid.
- this.compressedOtids = new CompressedOtid[distinctOtids];
- lastSeenOtid = otids[0];
+ // Once we have processed all the delete events and seen all the distinct owids,
+ // we compress the owids into CompressedOwid data structure that records
+ // the fromIndex(inclusive) and toIndex(exclusive) for each unique owid.
+ this.compressedOwids = new CompressedOwid[distinctOwids];
+ lastSeenOwid = owids[0];
lastSeenBucketProperty = bucketProperties[0];
int fromIndex = 0, pos = 0;
- for (int i = 1; i < otids.length; ++i) {
- if (otids[i] != lastSeenOtid || lastSeenBucketProperty != bucketProperties[i]) {
- compressedOtids[pos] =
- new CompressedOtid(lastSeenOtid, lastSeenBucketProperty, fromIndex, i);
- lastSeenOtid = otids[i];
+ for (int i = 1; i < owids.length; ++i) {
+ if (owids[i] != lastSeenOwid || lastSeenBucketProperty != bucketProperties[i]) {
+ compressedOwids[pos] =
+ new CompressedOwid(lastSeenOwid, lastSeenBucketProperty, fromIndex, i);
+ lastSeenOwid = owids[i];
lastSeenBucketProperty = bucketProperties[i];
fromIndex = i;
++pos;
}
}
- // account for the last distinct otid
- compressedOtids[pos] =
- new CompressedOtid(lastSeenOtid, lastSeenBucketProperty, fromIndex, otids.length);
+ // account for the last distinct owid
+ compressedOwids[pos] =
+ new CompressedOwid(lastSeenOwid, lastSeenBucketProperty, fromIndex, owids.length);
}
- private boolean isDeleted(long otid, int bucketProperty, long rowId) {
- if (compressedOtids == null || rowIds == null) {
+ private boolean isDeleted(long owid, int bucketProperty, long rowId) {
+ if (compressedOwids == null || rowIds == null) {
return false;
}
- // To find if a given (otid, rowId) pair is deleted or not, we perform
+ // To find if a given (owid, rowId) pair is deleted or not, we perform
// two binary searches at most. The first binary search is on the
- // compressed otids. If a match is found, only then we do the next
+ // compressed owids. If a match is found, only then we do the next
// binary search in the larger rowId vector between the given toIndex & fromIndex.
- // Check if otid is outside the range of all otids present.
- if (otid < compressedOtids[0].originalTransactionId
- || otid > compressedOtids[compressedOtids.length - 1].originalTransactionId) {
+ // Check if owid is outside the range of all owids present.
+ if (owid < compressedOwids[0].originalWriteId
+ || owid > compressedOwids[compressedOwids.length - 1].originalWriteId) {
return false;
}
- // Create a dummy key for searching the otid/bucket in the compressed otid ranges.
- CompressedOtid key = new CompressedOtid(otid, bucketProperty, -1, -1);
- int pos = Arrays.binarySearch(compressedOtids, key);
+ // Create a dummy key for searching the owid/bucket in the compressed owid ranges.
+ CompressedOwid key = new CompressedOwid(owid, bucketProperty, -1, -1);
+ int pos = Arrays.binarySearch(compressedOwids, key);
if (pos >= 0) {
- // Otid with the given value found! Searching now for rowId...
- key = compressedOtids[pos]; // Retrieve the actual CompressedOtid that matched.
- // Check if rowId is outside the range of all rowIds present for this otid.
+ // Owid with the given value found! Searching now for rowId...
+ key = compressedOwids[pos]; // Retrieve the actual CompressedOwid that matched.
+ // Check if rowId is outside the range of all rowIds present for this owid.
if (rowId < rowIds[key.fromIndex]
|| rowId > rowIds[key.toIndex - 1]) {
return false;
@@ -1181,16 +1181,16 @@ public boolean isEmpty() {
@Override
public void findDeletedRecords(ColumnVector[] cols, int size, BitSet selectedBitSet)
throws IOException {
- if (rowIds == null || compressedOtids == null) {
+ if (rowIds == null || compressedOwids == null) {
return;
}
- // Iterate through the batch and for each (otid, rowid) in the batch
+ // Iterate through the batch and for each (owid, rowid) in the batch
// check if it is deleted or not.
- long[] originalTransactionVector =
+ long[] originalWriteIdVector =
cols[OrcRecordUpdater.ORIGINAL_WRITEID].isRepeating ? null
: ((LongColumnVector) cols[OrcRecordUpdater.ORIGINAL_WRITEID]).vector;
- long repeatedOriginalTransaction = (originalTransactionVector != null) ? -1
+ long repeatedOriginalWriteId = (originalWriteIdVector != null) ? -1
: ((LongColumnVector) cols[OrcRecordUpdater.ORIGINAL_WRITEID]).vector[0];
long[] bucketProperties =
@@ -1205,12 +1205,12 @@ public void findDeletedRecords(ColumnVector[] cols, int size, BitSet selectedBit
for (int setBitIndex = selectedBitSet.nextSetBit(0);
setBitIndex >= 0;
setBitIndex = selectedBitSet.nextSetBit(setBitIndex+1)) {
- long otid = originalTransactionVector != null ? originalTransactionVector[setBitIndex]
- : repeatedOriginalTransaction ;
+ long owid = originalWriteIdVector != null ? originalWriteIdVector[setBitIndex]
+ : repeatedOriginalWriteId ;
int bucketProperty = bucketProperties != null ? (int)bucketProperties[setBitIndex]
: repeatedBucketProperty;
long rowId = rowIdVector[setBitIndex];
- if (isDeleted(otid, bucketProperty, rowId)) {
+ if (isDeleted(owid, bucketProperty, rowId)) {
selectedBitSet.clear(setBitIndex);
}
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index baa9070..fd8137d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -1726,17 +1726,17 @@ else if(!isAcidIUDoperation && isFullAcidTable) {
* Load Data commands for fullAcid tables write to base_x (if there is overwrite clause) or
* delta_x_x directory - same as any other Acid write. This method modifies the destPath to add
* this path component.
- * @param txnId - id of current transaction (in which this operation is running)
+ * @param writeId - write id of the operated table from current transaction (in which this operation is running)
* @param stmtId - see {@link DbTxnManager#getStmtIdAndIncrement()}
* @return appropriately modified path
*/
- private Path fixFullAcidPathForLoadData(LoadFileType loadFileType, Path destPath, long txnId, int stmtId, Table tbl) throws HiveException {
+ private Path fixFullAcidPathForLoadData(LoadFileType loadFileType, Path destPath, long writeId, int stmtId, Table tbl) throws HiveException {
switch (loadFileType) {
case REPLACE_ALL:
- destPath = new Path(destPath, AcidUtils.baseDir(txnId));
+ destPath = new Path(destPath, AcidUtils.baseDir(writeId));
break;
case KEEP_EXISTING:
- destPath = new Path(destPath, AcidUtils.deltaSubdir(txnId, txnId, stmtId));
+ destPath = new Path(destPath, AcidUtils.deltaSubdir(writeId, writeId, stmtId));
break;
case OVERWRITE_EXISTING:
//should not happen here - this is for replication
@@ -1758,9 +1758,9 @@ private boolean areEventsForDmlNeeded(Table tbl, Partition oldPart) {
return conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary() && oldPart != null;
}
- private List listFilesCreatedByQuery(Path loadPath, long txnId, int stmtId) throws HiveException {
+ private List listFilesCreatedByQuery(Path loadPath, long writeId, int stmtId) throws HiveException {
List newFiles = new ArrayList();
- final String filePrefix = AcidUtils.deltaSubdir(txnId, txnId, stmtId);
+ final String filePrefix = AcidUtils.deltaSubdir(writeId, writeId, stmtId);
FileStatus[] srcs;
FileSystem srcFs;
try {
@@ -1926,7 +1926,7 @@ private void constructOneLBLocationMap(FileStatus fSta,
* @throws HiveException
*/
private Set getValidPartitionsInPath(
- int numDP, int numLB, Path loadPath, Long txnId, int stmtId,
+ int numDP, int numLB, Path loadPath, Long writeId, int stmtId,
boolean isMmTable, boolean isInsertOverwrite) throws HiveException {
Set validPartitions = new HashSet();
try {
@@ -1950,7 +1950,7 @@ private void constructOneLBLocationMap(FileStatus fSta,
Utilities.FILE_OP_LOGGER.trace(
"Looking for dynamic partitions in {} ({} levels)", loadPath, numDP);
Path[] leafStatus = Utilities.getMmDirectoryCandidates(
- fs, loadPath, numDP, numLB, null, txnId, -1, conf, isInsertOverwrite);
+ fs, loadPath, numDP, numLB, null, writeId, -1, conf, isInsertOverwrite);
for (Path p : leafStatus) {
Path dpPath = p.getParent(); // Skip the MM directory that we have found.
for (int i = 0; i < numLB; ++i) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
index 171825e..7a255f4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
@@ -119,7 +119,7 @@
/**
* A set of FileSinkOperators being written to in an ACID compliant way. We need to remember
- * them here because when we build them we don't yet know the transaction id. We need to go
+ * them here because when we build them we don't yet know the write id. We need to go
* back and set it once we actually start running the query.
* This also contains insert-only sinks.
*/
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
index 31da66a..4870a90 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
@@ -281,7 +281,7 @@ void run(HiveConf conf, String jobName, Table t, StorageDescriptor sd, ValidWrit
String minOpenInfo = ".";
if(writeIds.getMinOpenWriteId() != null) {
minOpenInfo = " with min Open " + JavaUtils.writeIdToString(writeIds.getMinOpenWriteId()) +
- ". Compaction cannot compact above this txnid";
+ ". Compaction cannot compact above this writeId";
}
LOG.error("No delta files or original files found to compact in " + sd.getLocation() +
" for compactionId=" + ci.id + minOpenInfo);
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
index b90f5b1..12d57c6 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
@@ -92,11 +92,11 @@ public void testNonAcidInsert() throws Exception {
/**
* Useful for debugging. Dumps ORC file in JSON to CWD.
*/
- private void dumpBucketData(Table table, long txnId, int stmtId, int bucketNum) throws Exception {
+ private void dumpBucketData(Table table, long writeId, int stmtId, int bucketNum) throws Exception {
if(true) {
return;
}
- Path bucket = AcidUtils.createBucketFile(new Path(new Path(getWarehouseDir(), table.toString().toLowerCase()), AcidUtils.deltaSubdir(txnId, txnId, stmtId)), bucketNum);
+ Path bucket = AcidUtils.createBucketFile(new Path(new Path(getWarehouseDir(), table.toString().toLowerCase()), AcidUtils.deltaSubdir(writeId, writeId, stmtId)), bucketNum);
FileOutputStream delta = new FileOutputStream(testName.getMethodName() + "_" + bucket.getParent().getName() + "_" + bucket.getName());
// try {
// FileDump.printJsonData(conf, bucket.toString(), delta);
@@ -109,9 +109,9 @@ private void dumpBucketData(Table table, long txnId, int stmtId, int bucketNum)
/**
* Dump all data in the table by bucket in JSON format
*/
- private void dumpTableData(Table table, long txnId, int stmtId) throws Exception {
+ private void dumpTableData(Table table, long writeId, int stmtId) throws Exception {
for(int bucketNum = 0; bucketNum < BUCKET_COUNT; bucketNum++) {
- dumpBucketData(table, txnId, stmtId, bucketNum);
+ dumpBucketData(table, writeId, stmtId, bucketNum);
}
}
@Test
@@ -765,16 +765,16 @@ public void testNonAcidToAcidConversion01() throws Exception {
BucketCodec.V1.encode(new AcidOutputFormat.Options(hiveConf).bucket(1)));
Assert.assertEquals("", 4, rs.size());
Assert.assertTrue(rs.get(0),
- rs.get(0).startsWith("{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":0}\t0\t12"));
+ rs.get(0).startsWith("{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t0\t12"));
Assert.assertTrue(rs.get(0), rs.get(0).endsWith("nonacidorctbl/000000_0_copy_1"));
Assert.assertTrue(rs.get(1),
- rs.get(1).startsWith("{\"transactionid\":0,\"bucketid\":536936448,\"rowid\":0}\t1\t2"));
+ rs.get(1).startsWith("{\"writeid\":0,\"bucketid\":536936448,\"rowid\":0}\t1\t2"));
Assert.assertTrue(rs.get(1), rs.get(1).endsWith("nonacidorctbl/000001_0"));
Assert.assertTrue(rs.get(2),
- rs.get(2).startsWith("{\"transactionid\":0,\"bucketid\":536936448,\"rowid\":1}\t1\t5"));
+ rs.get(2).startsWith("{\"writeid\":0,\"bucketid\":536936448,\"rowid\":1}\t1\t5"));
Assert.assertTrue(rs.get(2), rs.get(2).endsWith("nonacidorctbl/000001_0_copy_1"));
Assert.assertTrue(rs.get(3),
- rs.get(3).startsWith("{\"transactionid\":1,\"bucketid\":536936448,\"rowid\":0}\t1\t17"));
+ rs.get(3).startsWith("{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t1\t17"));
Assert.assertTrue(rs.get(3), rs.get(3).endsWith("nonacidorctbl/delta_0000001_0000001_0000/bucket_00001"));
//run Compaction
runStatementOnDriver("alter table "+ TestTxnCommands2.Table.NONACIDORCTBL +" compact 'major'");
@@ -786,16 +786,16 @@ public void testNonAcidToAcidConversion01() throws Exception {
}
Assert.assertEquals("", 4, rs.size());
Assert.assertTrue(rs.get(0),
- rs.get(0).startsWith("{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":0}\t0\t12"));
+ rs.get(0).startsWith("{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t0\t12"));
Assert.assertTrue(rs.get(0), rs.get(0).endsWith("nonacidorctbl/base_0000001/bucket_00000"));
Assert.assertTrue(rs.get(1),
- rs.get(1).startsWith("{\"transactionid\":0,\"bucketid\":536936448,\"rowid\":0}\t1\t2"));
+ rs.get(1).startsWith("{\"writeid\":0,\"bucketid\":536936448,\"rowid\":0}\t1\t2"));
Assert.assertTrue(rs.get(1), rs.get(1).endsWith("nonacidorctbl/base_0000001/bucket_00001"));
Assert.assertTrue(rs.get(2),
- rs.get(2).startsWith("{\"transactionid\":0,\"bucketid\":536936448,\"rowid\":1}\t1\t5"));
+ rs.get(2).startsWith("{\"writeid\":0,\"bucketid\":536936448,\"rowid\":1}\t1\t5"));
Assert.assertTrue(rs.get(2), rs.get(2).endsWith("nonacidorctbl/base_0000001/bucket_00001"));
Assert.assertTrue(rs.get(3),
- rs.get(3).startsWith("{\"transactionid\":1,\"bucketid\":536936448,\"rowid\":0}\t1\t17"));
+ rs.get(3).startsWith("{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t1\t17"));
Assert.assertTrue(rs.get(3), rs.get(3).endsWith("nonacidorctbl/base_0000001/bucket_00001"));
//make sure they are the same before and after compaction
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
index 2eead9e..b832f71 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
@@ -366,15 +366,15 @@ public void testNonAcidToAcidConversion02() throws Exception {
* Note: order of rows in a file ends up being the reverse of order in values clause (why?!)
*/
String[][] expected = {
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":0}\t0\t13", "bucket_00000"},
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t15", "bucket_00000"},
- {"{\"transactionid\":3,\"bucketid\":536870912,\"rowid\":0}\t0\t17", "bucket_00000"},
- {"{\"transactionid\":2,\"bucketid\":536870912,\"rowid\":0}\t0\t120", "bucket_00000"},
- {"{\"transactionid\":0,\"bucketid\":536936448,\"rowid\":1}\t1\t2", "bucket_00001"},
- {"{\"transactionid\":0,\"bucketid\":536936448,\"rowid\":3}\t1\t4", "bucket_00001"},
- {"{\"transactionid\":0,\"bucketid\":536936448,\"rowid\":2}\t1\t5", "bucket_00001"},
- {"{\"transactionid\":0,\"bucketid\":536936448,\"rowid\":4}\t1\t6", "bucket_00001"},
- {"{\"transactionid\":1,\"bucketid\":536936448,\"rowid\":0}\t1\t16", "bucket_00001"}
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t0\t13", "bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t15", "bucket_00000"},
+ {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t0\t17", "bucket_00000"},
+ {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t0\t120", "bucket_00000"},
+ {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":1}\t1\t2", "bucket_00001"},
+ {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":3}\t1\t4", "bucket_00001"},
+ {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":2}\t1\t5", "bucket_00001"},
+ {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":4}\t1\t6", "bucket_00001"},
+ {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t1\t16", "bucket_00001"}
};
Assert.assertEquals("Unexpected row count before compaction", expected.length, rs.size());
for(int i = 0; i < expected.length; i++) {
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java
index 0a305a4..8a01de3 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java
@@ -105,13 +105,13 @@ private void loadDataUpdate(boolean isVectorized) throws Exception {
String testQuery = isVectorized ? "select ROW__ID, a, b from T order by ROW__ID" :
"select ROW__ID, a, b, INPUT__FILE__NAME from T order by ROW__ID";
String[][] expected = new String[][]{
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000001_0000001_0000/000000_0"},
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/000000_0"}};
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000001_0000001_0000/000000_0"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/000000_0"}};
checkResult(expected, testQuery, isVectorized, "load data inpath");
runStatementOnDriver("update T set b = 17 where a = 1");
String[][] expected2 = new String[][]{
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/000000_0"},
- {"{\"transactionid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t17", "t/delta_0000002_0000002_0000/bucket_00000"}
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/000000_0"},
+ {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t17", "t/delta_0000002_0000002_0000/bucket_00000"}
};
checkResult(expected2, testQuery, isVectorized, "update");
@@ -121,15 +121,15 @@ private void loadDataUpdate(boolean isVectorized) throws Exception {
runStatementOnDriver("alter table T compact 'minor'");
TestTxnCommands2.runWorker(hiveConf);
String[][] expected3 = new String[][] {
- {"{\"transactionid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t17", "t/delta_0000001_0000004/bucket_00000"},
- {"{\"transactionid\":3,\"bucketid\":536870912,\"rowid\":0}\t2\t2", "t/delta_0000001_0000004/bucket_00000"}
+ {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t17", "t/delta_0000001_0000004/bucket_00000"},
+ {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t2\t2", "t/delta_0000001_0000004/bucket_00000"}
};
checkResult(expected3, testQuery, isVectorized, "delete compact minor");
runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/1/data' overwrite into table T");
String[][] expected4 = new String[][]{
- {"{\"transactionid\":5,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/base_0000005/000000_0"},
- {"{\"transactionid\":5,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/base_0000005/000000_0"}};
+ {"{\"writeid\":5,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/base_0000005/000000_0"},
+ {"{\"writeid\":5,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/base_0000005/000000_0"}};
checkResult(expected4, testQuery, isVectorized, "load data inpath overwrite");
//load same data again (additive)
@@ -138,9 +138,9 @@ private void loadDataUpdate(boolean isVectorized) throws Exception {
runStatementOnDriver("delete from T where a = 3");//matches 2 rows
runStatementOnDriver("insert into T values(2,2)");
String[][] expected5 = new String[][]{
- {"{\"transactionid\":7,\"bucketid\":536870912,\"rowid\":0}\t1\t17", "t/delta_0000007_0000007_0000/bucket_00000"},
- {"{\"transactionid\":7,\"bucketid\":536870912,\"rowid\":1}\t1\t17", "t/delta_0000007_0000007_0000/bucket_00000"},
- {"{\"transactionid\":9,\"bucketid\":536870912,\"rowid\":0}\t2\t2", "t/delta_0000009_0000009_0000/bucket_00000"}
+ {"{\"writeid\":7,\"bucketid\":536870912,\"rowid\":0}\t1\t17", "t/delta_0000007_0000007_0000/bucket_00000"},
+ {"{\"writeid\":7,\"bucketid\":536870912,\"rowid\":1}\t1\t17", "t/delta_0000007_0000007_0000/bucket_00000"},
+ {"{\"writeid\":9,\"bucketid\":536870912,\"rowid\":0}\t2\t2", "t/delta_0000009_0000009_0000/bucket_00000"}
};
checkResult(expected5, testQuery, isVectorized, "load data inpath overwrite update");
@@ -148,9 +148,9 @@ private void loadDataUpdate(boolean isVectorized) throws Exception {
runStatementOnDriver("alter table T compact 'major'");
TestTxnCommands2.runWorker(hiveConf);
String[][] expected6 = new String[][]{
- {"{\"transactionid\":7,\"bucketid\":536870912,\"rowid\":0}\t1\t17", "t/base_0000009/bucket_00000"},
- {"{\"transactionid\":7,\"bucketid\":536870912,\"rowid\":1}\t1\t17", "t/base_0000009/bucket_00000"},
- {"{\"transactionid\":9,\"bucketid\":536870912,\"rowid\":0}\t2\t2", "t/base_0000009/bucket_00000"}
+ {"{\"writeid\":7,\"bucketid\":536870912,\"rowid\":0}\t1\t17", "t/base_0000009/bucket_00000"},
+ {"{\"writeid\":7,\"bucketid\":536870912,\"rowid\":1}\t1\t17", "t/base_0000009/bucket_00000"},
+ {"{\"writeid\":9,\"bucketid\":536870912,\"rowid\":0}\t2\t2", "t/base_0000009/bucket_00000"}
};
checkResult(expected6, testQuery, isVectorized, "load data inpath compact major");
}
@@ -174,21 +174,21 @@ private void loadData(boolean isVectorized) throws Exception {
"select ROW__ID, a, b, INPUT__FILE__NAME from T order by ROW__ID";
String[][] expected = new String[][] {
//normal insert
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t2", "t/delta_0000001_0000001_0000/bucket_00000"},
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":1}\t0\t4", "t/delta_0000001_0000001_0000/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t2", "t/delta_0000001_0000001_0000/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t0\t4", "t/delta_0000001_0000001_0000/bucket_00000"},
//Load Data
- {"{\"transactionid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000002_0000002_0000/000000_0"},
- {"{\"transactionid\":2,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000002_0000002_0000/000000_0"}};
+ {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000002_0000002_0000/000000_0"},
+ {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000002_0000002_0000/000000_0"}};
checkResult(expected, testQuery, isVectorized, "load data inpath");
//test minor compaction
runStatementOnDriver("alter table T compact 'minor'");
TestTxnCommands2.runWorker(hiveConf);
String[][] expected1 = new String[][] {
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t2", "t/delta_0000001_0000002/bucket_00000"},
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":1}\t0\t4", "t/delta_0000001_0000002/bucket_00000"},
- {"{\"transactionid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000001_0000002/bucket_00000"},
- {"{\"transactionid\":2,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000002/bucket_00000"}
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t2", "t/delta_0000001_0000002/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t0\t4", "t/delta_0000001_0000002/bucket_00000"},
+ {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000001_0000002/bucket_00000"},
+ {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000002/bucket_00000"}
};
checkResult(expected1, testQuery, isVectorized, "load data inpath (minor)");
@@ -197,11 +197,11 @@ private void loadData(boolean isVectorized) throws Exception {
runStatementOnDriver("alter table T compact 'major'");
TestTxnCommands2.runWorker(hiveConf);
String[][] expected2 = new String[][] {
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t2", "t/base_0000003/bucket_00000"},
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":1}\t0\t4", "t/base_0000003/bucket_00000"},
- {"{\"transactionid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/base_0000003/bucket_00000"},
- {"{\"transactionid\":2,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/base_0000003/bucket_00000"},
- {"{\"transactionid\":3,\"bucketid\":536870912,\"rowid\":0}\t2\t2", "t/base_0000003/bucket_00000"}
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t2", "t/base_0000003/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t0\t4", "t/base_0000003/bucket_00000"},
+ {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/base_0000003/bucket_00000"},
+ {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/base_0000003/bucket_00000"},
+ {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t2\t2", "t/base_0000003/bucket_00000"}
};
checkResult(expected2, testQuery, isVectorized, "load data inpath (major)");
@@ -210,8 +210,8 @@ private void loadData(boolean isVectorized) throws Exception {
runStatementOnDriver("export table Tstage to '" + getWarehouseDir() +"/2'");
runStatementOnDriver("load data inpath '" + getWarehouseDir() + "/2/data' overwrite into table T");
String[][] expected3 = new String[][] {
- {"{\"transactionid\":4,\"bucketid\":536870912,\"rowid\":0}\t5\t6", "t/base_0000004/000000_0"},
- {"{\"transactionid\":4,\"bucketid\":536870912,\"rowid\":1}\t7\t8", "t/base_0000004/000000_0"}};
+ {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t5\t6", "t/base_0000004/000000_0"},
+ {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":1}\t7\t8", "t/base_0000004/000000_0"}};
checkResult(expected3, testQuery, isVectorized, "load data inpath overwrite");
//one more major compaction
@@ -219,9 +219,9 @@ private void loadData(boolean isVectorized) throws Exception {
runStatementOnDriver("alter table T compact 'major'");
TestTxnCommands2.runWorker(hiveConf);
String[][] expected4 = new String[][] {
- {"{\"transactionid\":4,\"bucketid\":536870912,\"rowid\":0}\t5\t6", "t/base_0000005/bucket_00000"},
- {"{\"transactionid\":4,\"bucketid\":536870912,\"rowid\":1}\t7\t8", "t/base_0000005/bucket_00000"},
- {"{\"transactionid\":5,\"bucketid\":536870912,\"rowid\":0}\t6\t6", "t/base_0000005/bucket_00000"}};
+ {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t5\t6", "t/base_0000005/bucket_00000"},
+ {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":1}\t7\t8", "t/base_0000005/bucket_00000"},
+ {"{\"writeid\":5,\"bucketid\":536870912,\"rowid\":0}\t6\t6", "t/base_0000005/bucket_00000"}};
checkResult(expected4, testQuery, isVectorized, "load data inpath overwrite (major)");
}
/**
@@ -252,22 +252,22 @@ private void loadDataNonAcid2AcidConversion(boolean isVectorized) throws Excepti
String testQuery = isVectorized ? "select ROW__ID, a, b from T order by ROW__ID" :
"select ROW__ID, a, b, INPUT__FILE__NAME from T order by ROW__ID";
/*
-{"transactionid":0,"bucketid":536870912,"rowid":0} 0 2/000000_0
-{"transactionid":0,"bucketid":536870912,"rowid":1} 0 4/000000_0
-{"transactionid":1,"bucketid":536870912,"rowid":0} 4 4/delta_0000001_0000001_0000/000000_0
-{"transactionid":1,"bucketid":536870912,"rowid":1} 5 5/delta_0000001_0000001_0000/000000_0
+{"writeid":0,"bucketid":536870912,"rowid":0} 0 2/000000_0
+{"writeid":0,"bucketid":536870912,"rowid":1} 0 4/000000_0
+{"writeid":1,"bucketid":536870912,"rowid":0} 4 4/delta_0000001_0000001_0000/000000_0
+{"writeid":1,"bucketid":536870912,"rowid":1} 5 5/delta_0000001_0000001_0000/000000_0
*/
String[][] expected = new String[][] {
//from pre-acid insert
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":0}\t0\t2", "t/000000_0"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":1}\t0\t4", "t/000000_0"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t0\t2", "t/000000_0"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t0\t4", "t/000000_0"},
//from Load Data into acid converted table
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000001_0000001_0000/000000_0"},
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/000000_0"},
- {"{\"transactionid\":1,\"bucketid\":536936448,\"rowid\":0}\t2\t2", "t/delta_0000001_0000001_0000/000001_0"},
- {"{\"transactionid\":1,\"bucketid\":536936448,\"rowid\":1}\t3\t3", "t/delta_0000001_0000001_0000/000001_0"},
- {"{\"transactionid\":1,\"bucketid\":537001984,\"rowid\":0}\t4\t4", "t/delta_0000001_0000001_0000/000002_0"},
- {"{\"transactionid\":1,\"bucketid\":537001984,\"rowid\":1}\t5\t5", "t/delta_0000001_0000001_0000/000002_0"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000001_0000001_0000/000000_0"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/000000_0"},
+ {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t2\t2", "t/delta_0000001_0000001_0000/000001_0"},
+ {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t3\t3", "t/delta_0000001_0000001_0000/000001_0"},
+ {"{\"writeid\":1,\"bucketid\":537001984,\"rowid\":0}\t4\t4", "t/delta_0000001_0000001_0000/000002_0"},
+ {"{\"writeid\":1,\"bucketid\":537001984,\"rowid\":1}\t5\t5", "t/delta_0000001_0000001_0000/000002_0"},
};
checkResult(expected, testQuery, isVectorized, "load data inpath");
@@ -278,9 +278,9 @@ private void loadDataNonAcid2AcidConversion(boolean isVectorized) throws Excepti
runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/2/data' overwrite into table T");
String[][] expected2 = new String[][] {
- {"{\"transactionid\":2,\"bucketid\":536870912,\"rowid\":0}\t5\t6", "t/base_0000002/000000_0"},
- {"{\"transactionid\":2,\"bucketid\":536870912,\"rowid\":1}\t7\t8", "t/base_0000002/000000_0"},
- {"{\"transactionid\":2,\"bucketid\":536936448,\"rowid\":0}\t8\t8", "t/base_0000002/000001_0"}
+ {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t5\t6", "t/base_0000002/000000_0"},
+ {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t7\t8", "t/base_0000002/000000_0"},
+ {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":0}\t8\t8", "t/base_0000002/000001_0"}
};
checkResult(expected2, testQuery, isVectorized, "load data inpath overwrite");
@@ -290,10 +290,10 @@ private void loadDataNonAcid2AcidConversion(boolean isVectorized) throws Excepti
TestTxnCommands2.runWorker(hiveConf);
String[][] expected3 = new String[][] {
- {"{\"transactionid\":2,\"bucketid\":536870912,\"rowid\":0}\t5\t6", "t/base_0000003/bucket_00000"},
- {"{\"transactionid\":2,\"bucketid\":536870912,\"rowid\":1}\t7\t8", "t/base_0000003/bucket_00000"},
- {"{\"transactionid\":2,\"bucketid\":536936448,\"rowid\":0}\t8\t8", "t/base_0000003/bucket_00001"},
- {"{\"transactionid\":3,\"bucketid\":536870912,\"rowid\":0}\t9\t9", "t/base_0000003/bucket_00000"}
+ {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t5\t6", "t/base_0000003/bucket_00000"},
+ {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t7\t8", "t/base_0000003/bucket_00000"},
+ {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":0}\t8\t8", "t/base_0000003/bucket_00001"},
+ {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t9\t9", "t/base_0000003/bucket_00000"}
};
checkResult(expected3, testQuery, isVectorized, "load data inpath overwrite (major)");
}
@@ -324,12 +324,12 @@ public void loadDataPartitioned() throws Exception {
List rs = runStatementOnDriver("select ROW__ID, p, a, b, INPUT__FILE__NAME from T order by p, ROW__ID");
String[][] expected = new String[][] {
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t2", "t/p=0/delta_0000001_0000001_0000/000000_0"},
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":1}\t0\t0\t4", "t/p=0/delta_0000001_0000001_0000/000000_0"},
- {"{\"transactionid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t2", "t/p=1/delta_0000002_0000002_0000/000000_0"},
- {"{\"transactionid\":2,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t4", "t/p=1/delta_0000002_0000002_0000/000000_0"},
- {"{\"transactionid\":3,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t2", "t/p=1/delta_0000003_0000003_0000/000000_0"},
- {"{\"transactionid\":3,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t4", "t/p=1/delta_0000003_0000003_0000/000000_0"}};
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t2", "t/p=0/delta_0000001_0000001_0000/000000_0"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t0\t0\t4", "t/p=0/delta_0000001_0000001_0000/000000_0"},
+ {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t2", "t/p=1/delta_0000002_0000002_0000/000000_0"},
+ {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t4", "t/p=1/delta_0000002_0000002_0000/000000_0"},
+ {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t2", "t/p=1/delta_0000003_0000003_0000/000000_0"},
+ {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t4", "t/p=1/delta_0000003_0000003_0000/000000_0"}};
checkExpected(rs, expected, "load data inpath partitioned");
@@ -338,10 +338,10 @@ public void loadDataPartitioned() throws Exception {
runStatementOnDriver("truncate table Tstage");
runStatementOnDriver("load data inpath '" + getWarehouseDir() + "/4/data' overwrite into table T partition(p=1)");
String[][] expected2 = new String[][] {
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t2", "t/p=0/delta_0000001_0000001_0000/000000_0"},
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":1}\t0\t0\t4", "t/p=0/delta_0000001_0000001_0000/000000_0"},
- {"{\"transactionid\":4,\"bucketid\":536870912,\"rowid\":0}\t1\t5\t2", "t/p=1/base_0000004/000000_0"},
- {"{\"transactionid\":4,\"bucketid\":536870912,\"rowid\":1}\t1\t5\t4", "t/p=1/base_0000004/000000_0"}};
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t2", "t/p=0/delta_0000001_0000001_0000/000000_0"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t0\t0\t4", "t/p=0/delta_0000001_0000001_0000/000000_0"},
+ {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t1\t5\t2", "t/p=1/base_0000004/000000_0"},
+ {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":1}\t1\t5\t4", "t/p=1/base_0000004/000000_0"}};
rs = runStatementOnDriver("select ROW__ID, p, a, b, INPUT__FILE__NAME from T order by p, ROW__ID");
checkExpected(rs, expected2, "load data inpath partitioned overwrite");
}
@@ -403,20 +403,20 @@ private void testMultiStatement(boolean isVectorized) throws Exception {
String testQuery = isVectorized ? "select ROW__ID, a, b from T order by ROW__ID" :
"select ROW__ID, a, b, INPUT__FILE__NAME from T order by ROW__ID";
String[][] expected = new String[][] {
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000001_0000001_0000/bucket_00000"},
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/bucket_00000"},
- {"{\"transactionid\":1,\"bucketid\":536870913,\"rowid\":0}\t5\t5", "t/delta_0000001_0000001_0001/000000_0"},
- {"{\"transactionid\":1,\"bucketid\":536870913,\"rowid\":1}\t6\t6", "t/delta_0000001_0000001_0001/000000_0"}
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000001_0000001_0000/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870913,\"rowid\":0}\t5\t5", "t/delta_0000001_0000001_0001/000000_0"},
+ {"{\"writeid\":1,\"bucketid\":536870913,\"rowid\":1}\t6\t6", "t/delta_0000001_0000001_0001/000000_0"}
};
checkResult(expected, testQuery, isVectorized, "load data inpath");
runStatementOnDriver("alter table T compact 'major'");
TestTxnCommands2.runWorker(hiveConf);
String[][] expected2 = new String[][] {
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/base_0000001/bucket_00000"},
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/base_0000001/bucket_00000"},
- {"{\"transactionid\":1,\"bucketid\":536870913,\"rowid\":0}\t5\t5", "t/base_0000001/bucket_00000"},
- {"{\"transactionid\":1,\"bucketid\":536870913,\"rowid\":1}\t6\t6", "t/base_0000001/bucket_00000"}
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/base_0000001/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/base_0000001/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870913,\"rowid\":0}\t5\t5", "t/base_0000001/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870913,\"rowid\":1}\t6\t6", "t/base_0000001/bucket_00000"}
};
checkResult(expected2, testQuery, isVectorized, "load data inpath (major)");
//at lest for now, Load Data w/Overwrite is not allowed in a txn: HIVE-18154
@@ -442,8 +442,8 @@ public void testAbort() throws Exception {
String testQuery = isVectorized ? "select ROW__ID, a, b from T order by ROW__ID" :
"select ROW__ID, a, b, INPUT__FILE__NAME from T order by ROW__ID";
String[][] expected = new String[][] {
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000001_0000001_0000/bucket_00000"},
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/bucket_00000"}
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000001_0000001_0000/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/bucket_00000"}
};
checkResult(expected, testQuery, isVectorized, "load data inpath");
}
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java
index 38358f2..a4df509 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java
@@ -89,13 +89,13 @@ public void testNoBuckets() throws Exception {
/**the insert creates 2 output files (presumably because there are 2 input files)
* The number in the file name is writerId. This is the number encoded in ROW__ID.bucketId -
* see {@link org.apache.hadoop.hive.ql.io.BucketCodec}*/
- Assert.assertTrue(rs.get(0), rs.get(0).startsWith("{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t0\t"));
+ Assert.assertTrue(rs.get(0), rs.get(0).startsWith("{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t0\t"));
Assert.assertTrue(rs.get(0), rs.get(0).endsWith("nobuckets/delta_0000001_0000001_0000/bucket_00000"));
- Assert.assertTrue(rs.get(1), rs.get(1).startsWith("{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t3\t3\t"));
+ Assert.assertTrue(rs.get(1), rs.get(1).startsWith("{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t3\t3\t"));
Assert.assertTrue(rs.get(1), rs.get(1).endsWith("nobuckets/delta_0000001_0000001_0000/bucket_00000"));
- Assert.assertTrue(rs.get(2), rs.get(2).startsWith("{\"transactionid\":1,\"bucketid\":536936448,\"rowid\":0}\t1\t1\t1\t"));
+ Assert.assertTrue(rs.get(2), rs.get(2).startsWith("{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t1\t1\t1\t"));
Assert.assertTrue(rs.get(2), rs.get(2).endsWith("nobuckets/delta_0000001_0000001_0000/bucket_00001"));
- Assert.assertTrue(rs.get(3), rs.get(3).startsWith("{\"transactionid\":1,\"bucketid\":536936448,\"rowid\":1}\t2\t2\t2\t"));
+ Assert.assertTrue(rs.get(3), rs.get(3).startsWith("{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t2\t2\t2\t"));
Assert.assertTrue(rs.get(3), rs.get(3).endsWith("nobuckets/delta_0000001_0000001_0000/bucket_00001"));
runStatementOnDriver("update nobuckets set c3 = 17 where c3 in(0,1)");
@@ -104,14 +104,14 @@ public void testNoBuckets() throws Exception {
for(String s : rs) {
LOG.warn(s);
}
- Assert.assertTrue(rs.get(0), rs.get(0).startsWith("{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t3\t3\t"));
+ Assert.assertTrue(rs.get(0), rs.get(0).startsWith("{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t3\t3\t"));
Assert.assertTrue(rs.get(0), rs.get(0).endsWith("nobuckets/delta_0000001_0000001_0000/bucket_00000"));
- Assert.assertTrue(rs.get(1), rs.get(1).startsWith("{\"transactionid\":1,\"bucketid\":536936448,\"rowid\":1}\t2\t2\t2\t"));
+ Assert.assertTrue(rs.get(1), rs.get(1).startsWith("{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t2\t2\t2\t"));
Assert.assertTrue(rs.get(1), rs.get(1).endsWith("nobuckets/delta_0000001_0000001_0000/bucket_00001"));
//so update has 1 writer which creates bucket0 where both new rows land
- Assert.assertTrue(rs.get(2), rs.get(2).startsWith("{\"transactionid\":2,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t17\t"));
+ Assert.assertTrue(rs.get(2), rs.get(2).startsWith("{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t17\t"));
Assert.assertTrue(rs.get(2), rs.get(2).endsWith("nobuckets/delta_0000002_0000002_0000/bucket_00000"));
- Assert.assertTrue(rs.get(3), rs.get(3).startsWith("{\"transactionid\":2,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t17\t"));
+ Assert.assertTrue(rs.get(3), rs.get(3).startsWith("{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t17\t"));
Assert.assertTrue(rs.get(3), rs.get(3).endsWith("nobuckets/delta_0000002_0000002_0000/bucket_00000"));
Set expectedFiles = new HashSet<>();
@@ -144,13 +144,13 @@ public void testNoBuckets() throws Exception {
└── delta_0000002_0000002_0000
└── bucket_00000
*/
- Assert.assertTrue(rs.get(0), rs.get(0).startsWith("{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t3\t3\t"));
+ Assert.assertTrue(rs.get(0), rs.get(0).startsWith("{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t3\t3\t"));
Assert.assertTrue(rs.get(0), rs.get(0).endsWith("nobuckets/base_0000002/bucket_00000"));
- Assert.assertTrue(rs.get(1), rs.get(1).startsWith("{\"transactionid\":2,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t17\t"));
+ Assert.assertTrue(rs.get(1), rs.get(1).startsWith("{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t17\t"));
Assert.assertTrue(rs.get(1), rs.get(1).endsWith("nobuckets/base_0000002/bucket_00000"));
- Assert.assertTrue(rs.get(2), rs.get(2).startsWith("{\"transactionid\":2,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t17\t"));
+ Assert.assertTrue(rs.get(2), rs.get(2).startsWith("{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t17\t"));
Assert.assertTrue(rs.get(2), rs.get(2).endsWith("nobuckets/base_0000002/bucket_00000"));
- Assert.assertTrue(rs.get(3), rs.get(3).startsWith("{\"transactionid\":1,\"bucketid\":536936448,\"rowid\":1}\t2\t2\t2\t"));
+ Assert.assertTrue(rs.get(3), rs.get(3).startsWith("{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t2\t2\t2\t"));
Assert.assertTrue(rs.get(3), rs.get(3).endsWith("nobuckets/base_0000002/bucket_00001"));
expectedFiles.clear();
@@ -185,8 +185,8 @@ public void testCTAS() throws Exception {
"'='true', 'transactional_properties'='default') as select a, b from " + Table.NONACIDORCTBL);
List rs = runStatementOnDriver("select ROW__ID, a, b, INPUT__FILE__NAME from myctas order by ROW__ID");
String expected[][] = {
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":0}\t3\t4", "warehouse/myctas/delta_0000001_0000001_0000/bucket_00000"},
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t2", "warehouse/myctas/delta_0000001_0000001_0000/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t3\t4", "warehouse/myctas/delta_0000001_0000001_0000/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t2", "warehouse/myctas/delta_0000001_0000001_0000/bucket_00000"},
};
checkExpected(rs, expected, "Unexpected row count after ctas from non acid table");
@@ -195,8 +195,8 @@ public void testCTAS() throws Exception {
"'='true', 'transactional_properties'='default') as select a, b from " + Table.ACIDTBL);//todo: try this with acid default - it seem makeing table acid in listener is too late
rs = runStatementOnDriver("select ROW__ID, a, b, INPUT__FILE__NAME from myctas2 order by ROW__ID");
String expected2[][] = {
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":0}\t3\t4", "warehouse/myctas2/delta_0000001_0000001_0000/bucket_00000"},
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t2", "warehouse/myctas2/delta_0000001_0000001_0000/bucket_00000"}
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t3\t4", "warehouse/myctas2/delta_0000001_0000001_0000/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t2", "warehouse/myctas2/delta_0000001_0000001_0000/bucket_00000"}
};
checkExpected(rs, expected2, "Unexpected row count after ctas from acid table");
@@ -205,10 +205,10 @@ public void testCTAS() throws Exception {
" union all select a, b from " + Table.ACIDTBL);
rs = runStatementOnDriver("select ROW__ID, a, b, INPUT__FILE__NAME from myctas3 order by ROW__ID");
String expected3[][] = {
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":0}\t3\t4", "warehouse/myctas3/delta_0000001_0000001_0000/bucket_00000"},
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t2", "warehouse/myctas3/delta_0000001_0000001_0000/bucket_00000"},
- {"{\"transactionid\":1,\"bucketid\":536936448,\"rowid\":0}\t3\t4", "warehouse/myctas3/delta_0000001_0000001_0000/bucket_00001"},
- {"{\"transactionid\":1,\"bucketid\":536936448,\"rowid\":1}\t1\t2", "warehouse/myctas3/delta_0000001_0000001_0000/bucket_00001"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t3\t4", "warehouse/myctas3/delta_0000001_0000001_0000/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t2", "warehouse/myctas3/delta_0000001_0000001_0000/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t3\t4", "warehouse/myctas3/delta_0000001_0000001_0000/bucket_00001"},
+ {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t1\t2", "warehouse/myctas3/delta_0000001_0000001_0000/bucket_00001"},
};
checkExpected(rs, expected3, "Unexpected row count after ctas from union all query");
@@ -217,8 +217,8 @@ public void testCTAS() throws Exception {
" union distinct select a, b from " + Table.ACIDTBL);
rs = runStatementOnDriver("select ROW__ID, a, b, INPUT__FILE__NAME from myctas4 order by ROW__ID");
String expected4[][] = {
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "/delta_0000001_0000001_0000/bucket_00000"},
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "/delta_0000001_0000001_0000/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "/delta_0000001_0000001_0000/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "/delta_0000001_0000001_0000/bucket_00000"},
};
checkExpected(rs, expected4, "Unexpected row count after ctas from union distinct query");
}
@@ -268,11 +268,11 @@ public void testInsertToAcidWithUnionRemove() throws Exception {
List rs = runStatementOnDriver("select ROW__ID, a, b, INPUT__FILE__NAME from T order by ROW__ID");
String expected[][] = {
- {"{\"transactionid\":1,\"bucketid\":536870913,\"rowid\":0}\t1\t2", "/delta_0000001_0000001_0001/bucket_00000"},
- {"{\"transactionid\":1,\"bucketid\":536870913,\"rowid\":1}\t3\t4", "/delta_0000001_0000001_0001/bucket_00000"},
- {"{\"transactionid\":1,\"bucketid\":536870914,\"rowid\":0}\t7\t8", "/delta_0000001_0000001_0002/bucket_00000"},
- {"{\"transactionid\":1,\"bucketid\":536870914,\"rowid\":1}\t5\t6", "/delta_0000001_0000001_0002/bucket_00000"},
- {"{\"transactionid\":1,\"bucketid\":536870915,\"rowid\":0}\t9\t10", "/delta_0000001_0000001_0003/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870913,\"rowid\":0}\t1\t2", "/delta_0000001_0000001_0001/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870913,\"rowid\":1}\t3\t4", "/delta_0000001_0000001_0001/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870914,\"rowid\":0}\t7\t8", "/delta_0000001_0000001_0002/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870914,\"rowid\":1}\t5\t6", "/delta_0000001_0000001_0002/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870915,\"rowid\":0}\t9\t10", "/delta_0000001_0000001_0003/bucket_00000"},
};
checkExpected(rs, expected, "Unexpected row count after ctas");
}
@@ -347,16 +347,16 @@ public void testToAcidConversionMultiBucket() throws Exception {
logical bucket (tranche)
*/
String expected2[][] = {
- {"{\"transactionid\":0,\"bucketid\":537001984,\"rowid\":0}\t1\t2", "warehouse/t/000002_0"},
- {"{\"transactionid\":0,\"bucketid\":537001984,\"rowid\":1}\t2\t4", "warehouse/t/000002_0"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":1}\t5\t6", "warehouse/t/000000_0"},
- {"{\"transactionid\":0,\"bucketid\":536936448,\"rowid\":0}\t6\t8", "warehouse/t/000001_0"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":0}\t9\t10", "warehouse/t/000000_0"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":3}\t10\t20", "warehouse/t/HIVE_UNION_SUBDIR_15/000000_0"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":2}\t12\t12", "warehouse/t/000000_0_copy_1"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":4}\t20\t40", "warehouse/t/HIVE_UNION_SUBDIR_15/000000_0"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":5}\t50\t60", "warehouse/t/HIVE_UNION_SUBDIR_16/000000_0"},
- {"{\"transactionid\":0,\"bucketid\":536936448,\"rowid\":1}\t60\t80", "warehouse/t/HIVE_UNION_SUBDIR_16/000001_0"},
+ {"{\"writeid\":0,\"bucketid\":537001984,\"rowid\":0}\t1\t2", "warehouse/t/000002_0"},
+ {"{\"writeid\":0,\"bucketid\":537001984,\"rowid\":1}\t2\t4", "warehouse/t/000002_0"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t5\t6", "warehouse/t/000000_0"},
+ {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":0}\t6\t8", "warehouse/t/000001_0"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t9\t10", "warehouse/t/000000_0"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t10\t20", "warehouse/t/HIVE_UNION_SUBDIR_15/000000_0"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t12\t12", "warehouse/t/000000_0_copy_1"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t20\t40", "warehouse/t/HIVE_UNION_SUBDIR_15/000000_0"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":5}\t50\t60", "warehouse/t/HIVE_UNION_SUBDIR_16/000000_0"},
+ {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":1}\t60\t80", "warehouse/t/HIVE_UNION_SUBDIR_16/000001_0"},
};
checkExpected(rs, expected2,"after converting to acid (no compaction)");
Assert.assertEquals(0, BucketCodec.determineVersion(536870912).decodeWriterId(536870912));
@@ -368,15 +368,15 @@ logical bucket (tranche)
assertVectorized(shouldVectorize(), "delete from T where b = 8");
runStatementOnDriver("delete from T where b = 8");
String expected3[][] = {
- {"{\"transactionid\":0,\"bucketid\":537001984,\"rowid\":0}\t1\t2", "warehouse/t/000002_0"},
- {"{\"transactionid\":0,\"bucketid\":537001984,\"rowid\":1}\t2\t4", "warehouse/t/000002_0"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":1}\t5\t6", "warehouse/t/000000_0"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":0}\t9\t10", "warehouse/t/000000_0"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":3}\t10\t20", "warehouse/t/HIVE_UNION_SUBDIR_15/000000_0"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":2}\t12\t12", "warehouse/t/000000_0_copy_1"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":4}\t20\t40", "warehouse/t/HIVE_UNION_SUBDIR_15/000000_0"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":5}\t50\t60", "warehouse/t/HIVE_UNION_SUBDIR_16/000000_0"},
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":0}\t60\t88", "warehouse/t/delta_0000001_0000001_0000/bucket_00000"},
+ {"{\"writeid\":0,\"bucketid\":537001984,\"rowid\":0}\t1\t2", "warehouse/t/000002_0"},
+ {"{\"writeid\":0,\"bucketid\":537001984,\"rowid\":1}\t2\t4", "warehouse/t/000002_0"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t5\t6", "warehouse/t/000000_0"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t9\t10", "warehouse/t/000000_0"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t10\t20", "warehouse/t/HIVE_UNION_SUBDIR_15/000000_0"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t12\t12", "warehouse/t/000000_0_copy_1"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t20\t40", "warehouse/t/HIVE_UNION_SUBDIR_15/000000_0"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":5}\t50\t60", "warehouse/t/HIVE_UNION_SUBDIR_16/000000_0"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t60\t88", "warehouse/t/delta_0000001_0000001_0000/bucket_00000"},
};
rs = runStatementOnDriver("select ROW__ID, a, b, INPUT__FILE__NAME from T order by a, b, INPUT__FILE__NAME");
checkExpected(rs, expected3,"after converting to acid (no compaction with updates)");
@@ -388,15 +388,15 @@ logical bucket (tranche)
/*Compaction preserves location of rows wrt buckets/tranches (for now)*/
String expected4[][] = {
- {"{\"transactionid\":0,\"bucketid\":537001984,\"rowid\":0}\t1\t2", "warehouse/t/base_0000002/bucket_00002"},
- {"{\"transactionid\":0,\"bucketid\":537001984,\"rowid\":1}\t2\t4", "warehouse/t/base_0000002/bucket_00002"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":1}\t5\t6", "warehouse/t/base_0000002/bucket_00000"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":0}\t9\t10", "warehouse/t/base_0000002/bucket_00000"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":3}\t10\t20", "warehouse/t/base_0000002/bucket_00000"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":2}\t12\t12", "warehouse/t/base_0000002/bucket_00000"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":4}\t20\t40", "warehouse/t/base_0000002/bucket_00000"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":5}\t50\t60", "warehouse/t/base_0000002/bucket_00000"},
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":0}\t60\t88", "warehouse/t/base_0000002/bucket_00000"},
+ {"{\"writeid\":0,\"bucketid\":537001984,\"rowid\":0}\t1\t2", "warehouse/t/base_0000002/bucket_00002"},
+ {"{\"writeid\":0,\"bucketid\":537001984,\"rowid\":1}\t2\t4", "warehouse/t/base_0000002/bucket_00002"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t5\t6", "warehouse/t/base_0000002/bucket_00000"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t9\t10", "warehouse/t/base_0000002/bucket_00000"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t10\t20", "warehouse/t/base_0000002/bucket_00000"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t12\t12", "warehouse/t/base_0000002/bucket_00000"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t20\t40", "warehouse/t/base_0000002/bucket_00000"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":5}\t50\t60", "warehouse/t/base_0000002/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t60\t88", "warehouse/t/base_0000002/bucket_00000"},
};
checkExpected(rs, expected4,"after major compact");
}
@@ -468,15 +468,15 @@ public void testToAcidConversion02() throws Exception {
* Also check the file name (only) after compaction for completeness
*/
String[][] expected = {
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":3}\t0\t13", "bucket_00000", "000000_0_copy_1"},
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t15", "bucket_00000", "bucket_00000"},
- {"{\"transactionid\":3,\"bucketid\":536870912,\"rowid\":0}\t0\t17", "bucket_00000", "bucket_00000"},
- {"{\"transactionid\":2,\"bucketid\":536870912,\"rowid\":0}\t0\t120", "bucket_00000", "bucket_00000"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "bucket_00000", "000000_0"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":4}\t1\t4", "bucket_00000", "000000_0_copy_1"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":5}\t1\t5", "bucket_00000", "000000_0_copy_1"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":6}\t1\t6", "bucket_00000", "000000_0_copy_2"},
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t16", "bucket_00000", "bucket_00000"}
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t0\t13", "bucket_00000", "000000_0_copy_1"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t15", "bucket_00000", "bucket_00000"},
+ {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t0\t17", "bucket_00000", "bucket_00000"},
+ {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t0\t120", "bucket_00000", "bucket_00000"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "bucket_00000", "000000_0"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t1\t4", "bucket_00000", "000000_0_copy_1"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":5}\t1\t5", "bucket_00000", "000000_0_copy_1"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":6}\t1\t6", "bucket_00000", "000000_0_copy_2"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t16", "bucket_00000", "bucket_00000"}
};
Assert.assertEquals("Unexpected row count before compaction", expected.length, rs.size());
for(int i = 0; i < expected.length; i++) {
@@ -569,8 +569,8 @@ public void testNonAcidToAcidVectorzied() throws Exception {
query = "select ROW__ID, a from T where b > 6 order by a";
rs = runStatementOnDriver(query);
String[][] expected1 = {
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":3}", "6"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":4}", "9"}
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}", "6"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}", "9"}
};
checkExpected(rs, expected1, "After conversion with VC1");
assertVectorized(shouldVectorize(), query);
@@ -579,11 +579,11 @@ public void testNonAcidToAcidVectorzied() throws Exception {
query = "select ROW__ID, a from T where b > 0 order by a";
rs = runStatementOnDriver(query);
String[][] expected2 = {
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":0}", "1"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":1}", "2"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":2}", "5"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":3}", "6"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":4}", "9"}
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}", "1"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}", "2"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}", "5"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}", "6"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}", "9"}
};
checkExpected(rs, expected2, "After conversion with VC2");
assertVectorized(shouldVectorize(), query);
@@ -593,8 +593,8 @@ public void testNonAcidToAcidVectorzied() throws Exception {
rs = runStatementOnDriver(query);
Assert.assertEquals("", 2, rs.size());
String[][] expected3 = {
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":3}\t6", "warehouse/t/000000_0"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":4}\t9", "warehouse/t/000000_0"}
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t6", "warehouse/t/000000_0"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t9", "warehouse/t/000000_0"}
};
checkExpected(rs, expected3, "After non-vectorized read");
Assert.assertEquals(0, BucketCodec.determineVersion(536870912).decodeWriterId(536870912));
@@ -606,11 +606,11 @@ public void testNonAcidToAcidVectorzied() throws Exception {
query = "select ROW__ID, b from T where b > 0 order by a";
rs = runStatementOnDriver(query);
String[][] expected4 = {
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":0}","17"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":1}","4"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":2}","6"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":3}","8"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":4}","10"}
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}","17"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}","4"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}","6"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}","8"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}","10"}
};
checkExpected(rs, expected4, "After conversion with VC4");
assertVectorized(shouldVectorize(), query);
@@ -627,11 +627,11 @@ public void testNonAcidToAcidVectorzied() throws Exception {
query = "select ROW__ID, a, b, INPUT__FILE__NAME from T where b > 0 order by a, b";
rs = runStatementOnDriver(query);
String[][] expected5 = {//the row__ids are the same after compaction
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t17", "warehouse/t/base_0000001/bucket_00000"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":1}\t2\t4", "warehouse/t/base_0000001/bucket_00000"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":2}\t5\t6", "warehouse/t/base_0000001/bucket_00000"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":3}\t6\t8", "warehouse/t/base_0000001/bucket_00000"},
- {"{\"transactionid\":0,\"bucketid\":536870912,\"rowid\":4}\t9\t10", "warehouse/t/base_0000001/bucket_00000"}
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t17", "warehouse/t/base_0000001/bucket_00000"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t2\t4", "warehouse/t/base_0000001/bucket_00000"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t5\t6", "warehouse/t/base_0000001/bucket_00000"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t6\t8", "warehouse/t/base_0000001/bucket_00000"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t9\t10", "warehouse/t/base_0000001/bucket_00000"}
};
checkExpected(rs, expected5, "After major compaction");
//vectorized because there is INPUT__FILE__NAME
@@ -671,14 +671,14 @@ public void testCompactStatsGather() throws Exception {
String query = "select ROW__ID, p, q, a, b, INPUT__FILE__NAME from T order by p, q, a, b";
List rs = runStatementOnDriver(query);
String[][] expected = {
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t4\t1", "t/p=1/q=1/delta_0000001_0000001_0000/bucket_00000"},
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t4\t3", "t/p=1/q=1/delta_0000001_0000001_0000/bucket_00000"},
- {"{\"transactionid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t5\t1", "t/p=1/q=1/delta_0000002_0000002_0000/bucket_00000"},
- {"{\"transactionid\":2,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t5\t3", "t/p=1/q=1/delta_0000002_0000002_0000/bucket_00000"},
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t4\t2", "t/p=1/q=2/delta_0000001_0000001_0000/bucket_00000"},
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t4\t4", "t/p=1/q=2/delta_0000001_0000001_0000/bucket_00000"},
- {"{\"transactionid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t5\t2", "t/p=1/q=2/delta_0000002_0000002_0000/bucket_00000"},
- {"{\"transactionid\":2,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t5\t4", "t/p=1/q=2/delta_0000002_0000002_0000/bucket_00000"}
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t4\t1", "t/p=1/q=1/delta_0000001_0000001_0000/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t4\t3", "t/p=1/q=1/delta_0000001_0000001_0000/bucket_00000"},
+ {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t5\t1", "t/p=1/q=1/delta_0000002_0000002_0000/bucket_00000"},
+ {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t5\t3", "t/p=1/q=1/delta_0000002_0000002_0000/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t4\t2", "t/p=1/q=2/delta_0000001_0000001_0000/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t4\t4", "t/p=1/q=2/delta_0000001_0000001_0000/bucket_00000"},
+ {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t5\t2", "t/p=1/q=2/delta_0000002_0000002_0000/bucket_00000"},
+ {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t5\t4", "t/p=1/q=2/delta_0000002_0000002_0000/bucket_00000"}
};
checkExpected(rs, expected, "insert data");
@@ -689,14 +689,14 @@ public void testCompactStatsGather() throws Exception {
query = "select ROW__ID, p, q, a, b, INPUT__FILE__NAME from T order by p, q, a, b";
rs = runStatementOnDriver(query);
String[][] expected2 = {
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t4\t1", "t/p=1/q=1/delta_0000001_0000001_0000/bucket_00000"},
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t4\t3", "t/p=1/q=1/delta_0000001_0000001_0000/bucket_00000"},
- {"{\"transactionid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t5\t1", "t/p=1/q=1/delta_0000002_0000002_0000/bucket_00000"},
- {"{\"transactionid\":2,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t5\t3", "t/p=1/q=1/delta_0000002_0000002_0000/bucket_00000"},
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t4\t2", "t/p=1/q=2/base_0000002/bucket_00000"},
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t4\t4", "t/p=1/q=2/base_0000002/bucket_00000"},
- {"{\"transactionid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t5\t2", "t/p=1/q=2/base_0000002/bucket_00000"},
- {"{\"transactionid\":2,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t5\t4", "t/p=1/q=2/base_0000002/bucket_00000"}
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t4\t1", "t/p=1/q=1/delta_0000001_0000001_0000/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t4\t3", "t/p=1/q=1/delta_0000001_0000001_0000/bucket_00000"},
+ {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t5\t1", "t/p=1/q=1/delta_0000002_0000002_0000/bucket_00000"},
+ {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t5\t3", "t/p=1/q=1/delta_0000002_0000002_0000/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t4\t2", "t/p=1/q=2/base_0000002/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t4\t4", "t/p=1/q=2/base_0000002/bucket_00000"},
+ {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t5\t2", "t/p=1/q=2/base_0000002/bucket_00000"},
+ {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t5\t4", "t/p=1/q=2/base_0000002/bucket_00000"}
};
checkExpected(rs, expected2, "after major compaction");
@@ -721,8 +721,8 @@ public void testDefault() throws Exception {
List rs = runStatementOnDriver(query);
String[][] expected = {
//this proves data is written in Acid layout so T was made Acid
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000001_0000001_0000/bucket_00000"},
- {"{\"transactionid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/bucket_00000"}
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000001_0000001_0000/bucket_00000"},
+ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/bucket_00000"}
};
checkExpected(rs, expected, "insert data");
}
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java
index d673be4..7f6077c 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java
@@ -264,7 +264,7 @@ private void setupData(DataFormat format) {
private FileSinkOperator getFileSink(AcidUtils.Operation writeType,
boolean dynamic,
- long txnId) throws IOException, HiveException {
+ long writeId) throws IOException, HiveException {
TableDesc tableDesc = null;
switch (writeType) {
case DELETE:
@@ -292,8 +292,8 @@ private FileSinkOperator getFileSink(AcidUtils.Operation writeType,
}
desc.setWriteType(writeType);
desc.setGatherStats(true);
- if (txnId > 0) {
- desc.setTableWriteId(txnId);
+ if (writeId > 0) {
+ desc.setTableWriteId(writeId);
}
if (writeType != AcidUtils.Operation.NOT_ACID) {
desc.setTableWriteId(1L);
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index 073b072..e534c9e 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -2504,7 +2504,7 @@ public void testSetSearchArgument() throws Exception {
List types = new ArrayList();
OrcProto.Type.Builder builder = OrcProto.Type.newBuilder();
builder.setKind(OrcProto.Type.Kind.STRUCT)
- .addAllFieldNames(Arrays.asList("op", "otid", "bucket", "rowid", "ctid",
+ .addAllFieldNames(Arrays.asList("op", "owid", "bucket", "rowid", "cwid",
"row"))
.addAllSubtypes(Arrays.asList(1,2,3,4,5,6));
types.add(builder.build());
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java
index 6f41d69..d8a7af8 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java
@@ -508,7 +508,7 @@ static String getColumnNamesProperty() {
return "col1,ROW__ID";
}
static String getColumnTypesProperty() {
- return "string:struct";
+ return "string:struct";
}
}
@@ -1050,7 +1050,7 @@ static String getColumnNamesProperty() {
return "myint,mylong,mytext,myfloat,mydouble,ROW__ID";
}
static String getColumnTypesProperty() {
- return "int:bigint:string:float:double:struct";
+ return "int:bigint:string:float:double:struct";
}
}
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java
index 709f021..3acc085 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java
@@ -57,8 +57,8 @@
*/
public class TestVectorizedOrcAcidRowBatchReader {
- private static final long NUM_ROWID_PER_OTID = 15000L;
- private static final long NUM_OTID = 10L;
+ private static final long NUM_ROWID_PER_OWID = 15000L;
+ private static final long NUM_OWID = 10L;
private JobConf conf;
private FileSystem fs;
private Path root;
@@ -118,16 +118,16 @@ public void setup() throws Exception {
.bucket(bucket)
.writingBase(false)
.minimumWriteId(1)
- .maximumWriteId(NUM_OTID)
+ .maximumWriteId(NUM_OWID)
.inspector(inspector)
.reporter(Reporter.NULL)
.recordIdColumn(1)
.finalDestination(root);
RecordUpdater updater = new OrcRecordUpdater(root, options);
// Create a single insert delta with 150,000 rows, with 15000 rowIds per original transaction id.
- for (long i = 1; i <= NUM_OTID; ++i) {
- for (long j = 0; j < NUM_ROWID_PER_OTID; ++j) {
- long payload = (i-1) * NUM_ROWID_PER_OTID + j;
+ for (long i = 1; i <= NUM_OWID; ++i) {
+ for (long j = 0; j < NUM_ROWID_PER_OWID; ++j) {
+ long payload = (i-1) * NUM_ROWID_PER_OWID + j;
updater.insert(i, new DummyRow(payload, j, i, bucket));
}
}
@@ -140,11 +140,11 @@ public void setup() throws Exception {
// Create a delete delta that has rowIds divisible by 2 but not by 3. This will produce
// a delete delta file with 50,000 delete events.
- long currTxnId = NUM_OTID + 1;
+ long currTxnId = NUM_OWID + 1;
options.minimumWriteId(currTxnId).maximumWriteId(currTxnId);
updater = new OrcRecordUpdater(root, options);
- for (long i = 1; i <= NUM_OTID; ++i) {
- for (long j = 0; j < NUM_ROWID_PER_OTID; j += 1) {
+ for (long i = 1; i <= NUM_OWID; ++i) {
+ for (long j = 0; j < NUM_ROWID_PER_OWID; j += 1) {
if (j % 2 == 0 && j % 3 != 0) {
updater.delete(currTxnId, new DummyRow(-1, j, i, bucket));
}
@@ -153,11 +153,11 @@ public void setup() throws Exception {
updater.close(false);
// Now, create a delete delta that has rowIds divisible by 3 but not by 2. This will produce
// a delete delta file with 25,000 delete events.
- currTxnId = NUM_OTID + 2;
+ currTxnId = NUM_OWID + 2;
options.minimumWriteId(currTxnId).maximumWriteId(currTxnId);
updater = new OrcRecordUpdater(root, options);
- for (long i = 1; i <= NUM_OTID; ++i) {
- for (long j = 0; j < NUM_ROWID_PER_OTID; j += 1) {
+ for (long i = 1; i <= NUM_OWID; ++i) {
+ for (long j = 0; j < NUM_ROWID_PER_OWID; j += 1) {
if (j % 2 != 0 && j % 3 == 0) {
updater.delete(currTxnId, new DummyRow(-1, j, i, bucket));
}
@@ -166,11 +166,11 @@ public void setup() throws Exception {
updater.close(false);
// Now, create a delete delta that has rowIds divisible by both 3 and 2. This will produce
// a delete delta file with 25,000 delete events.
- currTxnId = NUM_OTID + 3;
+ currTxnId = NUM_OWID + 3;
options.minimumWriteId(currTxnId).maximumWriteId(currTxnId);
updater = new OrcRecordUpdater(root, options);
- for (long i = 1; i <= NUM_OTID; ++i) {
- for (long j = 0; j < NUM_ROWID_PER_OTID; j += 1) {
+ for (long i = 1; i <= NUM_OWID; ++i) {
+ for (long j = 0; j < NUM_ROWID_PER_OWID; j += 1) {
if (j % 2 == 0 && j % 3 == 0) {
updater.delete(currTxnId, new DummyRow(-1, j, i, bucket));
}
@@ -235,10 +235,10 @@ private void testVectorizedOrcAcidRowBatchReader(String deleteEventRegistry) thr
for (int i = 0; i < vectorizedRowBatch.size; ++i) {
int idx = vectorizedRowBatch.selected[i];
long payload = col.vector[idx];
- long otid = (payload / NUM_ROWID_PER_OTID) + 1;
- long rowId = payload % NUM_ROWID_PER_OTID;
+ long owid = (payload / NUM_ROWID_PER_OWID) + 1;
+ long rowId = payload % NUM_ROWID_PER_OWID;
assertFalse(rowId % 2 == 0 || rowId % 3 == 0);
- assertTrue(otid != 5); // Check that txn#5 has been excluded.
+ assertTrue(owid != 5); // Check that writeid#5 has been excluded.
assertTrue(payload > previousPayload); // Check that the data is in sorted order.
previousPayload = payload;
}
diff --git a/ql/src/test/queries/clientpositive/row__id.q b/ql/src/test/queries/clientpositive/row__id.q
index 6aaa40f..df2cb15 100644
--- a/ql/src/test/queries/clientpositive/row__id.q
+++ b/ql/src/test/queries/clientpositive/row__id.q
@@ -13,12 +13,12 @@ insert into hello_acid partition (load_date='2016-03-02') values (2, 2);
insert into hello_acid partition (load_date='2016-03-03') values (3, 3);
explain
-select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid;
+select tid from (select row__id.writeid as tid from hello_acid) sub order by tid;
-select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid;
+select tid from (select row__id.writeid as tid from hello_acid) sub order by tid;
explain
-select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3;
+select tid from (select row__id.writeid as tid from hello_acid) sub where tid = 3;
-select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3;
+select tid from (select row__id.writeid as tid from hello_acid) sub where tid = 3;
diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_acid_part.q b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_part.q
index 09d7050..e2e356e 100644
--- a/ql/src/test/queries/clientpositive/schema_evol_orc_acid_part.q
+++ b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_part.q
@@ -19,7 +19,7 @@ set hive.llap.io.enabled=false;
--
-- FILE VARIATION: ORC, ACID Non-Vectorized, MapWork, Partitioned
-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID.
--- Also, we don't do regular EXPLAINs on ACID files because the transaction id causes Q file statistics differences...
+-- Also, we don't do regular EXPLAINs on ACID files because the write id causes Q file statistics differences...
-- Instead just one explain vectorization only detail
--
diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_acid_part_llap_io.q b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_part_llap_io.q
index a526f57..1e5f69b 100644
--- a/ql/src/test/queries/clientpositive/schema_evol_orc_acid_part_llap_io.q
+++ b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_part_llap_io.q
@@ -20,7 +20,7 @@ set hive.llap.io.encode.enabled=true;
--
-- FILE VARIATION: ORC, ACID Non-Vectorized, MapWork, Partitioned
-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID.
--- Also, we don't do regular EXPLAINs on ACID files because the transaction id causes Q file statistics differences...
+-- Also, we don't do regular EXPLAINs on ACID files because the write id causes Q file statistics differences...
-- Instead just one explain vectorization only detail
--
diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_acid_part_update.q b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_part_update.q
index 17d461c..b58bb56 100644
--- a/ql/src/test/queries/clientpositive/schema_evol_orc_acid_part_update.q
+++ b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_part_update.q
@@ -19,7 +19,7 @@ set hive.llap.io.enabled=false;
--
-- FILE VARIATION: ORC, ACID Non-Vectorized, MapWork, Partitioned
-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID.
--- Also, we don't do EXPLAINs on ACID files because the transaction id causes Q file statistics differences...
+-- Also, we don't do EXPLAINs on ACID files because the write id causes Q file statistics differences...
--
CREATE TABLE schema_evolution_data(insert_num int, boolean1 boolean, tinyint1 tinyint, smallint1 smallint, int1 int, bigint1 bigint, decimal1 decimal(38,18), float1 float, double1 double, string1 string, string2 string, date1 date, timestamp1 timestamp, boolean_str string, tinyint_str string, smallint_str string, int_str string, bigint_str string, decimal_str string, float_str string, double_str string, date_str string, timestamp_str string, filler string)
diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_acid_part_update_llap_io.q b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_part_update_llap_io.q
index 1866fc8..2f82583 100644
--- a/ql/src/test/queries/clientpositive/schema_evol_orc_acid_part_update_llap_io.q
+++ b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_part_update_llap_io.q
@@ -20,7 +20,7 @@ set hive.llap.io.encode.enabled=true;
--
-- FILE VARIATION: ORC, ACID Non-Vectorized, MapWork, Partitioned
-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID.
--- Also, we don't do EXPLAINs on ACID files because the transaction id causes Q file statistics differences...
+-- Also, we don't do EXPLAINs on ACID files because the write id causes Q file statistics differences...
--
CREATE TABLE schema_evolution_data(insert_num int, boolean1 boolean, tinyint1 tinyint, smallint1 smallint, int1 int, bigint1 bigint, decimal1 decimal(38,18), float1 float, double1 double, string1 string, string2 string, date1 date, timestamp1 timestamp, boolean_str string, tinyint_str string, smallint_str string, int_str string, bigint_str string, decimal_str string, float_str string, double_str string, date_str string, timestamp_str string, filler string)
diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_acid_table.q b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_table.q
index 23af572..19e7bc5 100644
--- a/ql/src/test/queries/clientpositive/schema_evol_orc_acid_table.q
+++ b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_table.q
@@ -18,7 +18,7 @@ set hive.llap.io.enabled=false;
--
-- FILE VARIATION: ORC, ACID Non-Vectorized, MapWork, Table
-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID.
--- Also, we don't do regular EXPLAINs on ACID files because the transaction id causes Q file statistics differences...
+-- Also, we don't do regular EXPLAINs on ACID files because the write id causes Q file statistics differences...
-- Instead just one explain vectorization only detail
--
diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_acid_table_llap_io.q b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_table_llap_io.q
index 66e6da4..71ab2e5 100644
--- a/ql/src/test/queries/clientpositive/schema_evol_orc_acid_table_llap_io.q
+++ b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_table_llap_io.q
@@ -19,7 +19,7 @@ set hive.llap.io.encode.enabled=true;
--
-- FILE VARIATION: ORC, ACID Non-Vectorized, MapWork, Table
-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID.
--- Also, we don't do regular EXPLAINs on ACID files because the transaction id causes Q file statistics differences...
+-- Also, we don't do regular EXPLAINs on ACID files because the write id causes Q file statistics differences...
-- Instead just one explain vectorization only detail
--
diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_acid_table_update.q b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_table_update.q
index 4699cca..35c758a 100644
--- a/ql/src/test/queries/clientpositive/schema_evol_orc_acid_table_update.q
+++ b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_table_update.q
@@ -18,7 +18,7 @@ set hive.llap.io.enabled=false;
--
-- FILE VARIATION: ORC, ACID Non-Vectorized, MapWork, Table
-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID.
--- Also, we don't do EXPLAINs on ACID files because the transaction id causes Q file statistics differences...
+-- Also, we don't do EXPLAINs on ACID files because the write id causes Q file statistics differences...
--
CREATE TABLE schema_evolution_data(insert_num int, boolean1 boolean, tinyint1 tinyint, smallint1 smallint, int1 int, bigint1 bigint, decimal1 decimal(38,18), float1 float, double1 double, string1 string, string2 string, date1 date, timestamp1 timestamp, boolean_str string, tinyint_str string, smallint_str string, int_str string, bigint_str string, decimal_str string, float_str string, double_str string, date_str string, timestamp_str string, filler string)
diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_acid_table_update_llap_io.q b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_table_update_llap_io.q
index 16df15a..b72ded6 100644
--- a/ql/src/test/queries/clientpositive/schema_evol_orc_acid_table_update_llap_io.q
+++ b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_table_update_llap_io.q
@@ -19,7 +19,7 @@ set hive.llap.io.encode.enabled=true;
--
-- FILE VARIATION: ORC, ACID Non-Vectorized, MapWork, Table
-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID.
--- Also, we don't do EXPLAINs on ACID files because the transaction id causes Q file statistics differences...
+-- Also, we don't do EXPLAINs on ACID files because the write id causes Q file statistics differences...
--
CREATE TABLE schema_evolution_data(insert_num int, boolean1 boolean, tinyint1 tinyint, smallint1 smallint, int1 int, bigint1 bigint, decimal1 decimal(38,18), float1 float, double1 double, string1 string, string2 string, date1 date, timestamp1 timestamp, boolean_str string, tinyint_str string, smallint_str string, int_str string, bigint_str string, decimal_str string, float_str string, double_str string, date_str string, timestamp_str string, filler string)
diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_part.q b/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_part.q
index 1ad8650..fb86392 100644
--- a/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_part.q
+++ b/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_part.q
@@ -19,7 +19,7 @@ set hive.llap.io.enabled=false;
--
-- FILE VARIATION: ORC, ACID Vectorized, MapWork, Partitioned
-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID.
--- Also, we don't do regular EXPLAINs on ACID files because the transaction id causes Q file statistics differences...
+-- Also, we don't do regular EXPLAINs on ACID files because the write id causes Q file statistics differences...
-- Instead explain vectorization only detail
--
diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_part_llap_io.q b/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_part_llap_io.q
index e676346..7509d3d 100644
--- a/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_part_llap_io.q
+++ b/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_part_llap_io.q
@@ -20,7 +20,7 @@ set hive.llap.io.encode.enabled=true;
--
-- FILE VARIATION: ORC, ACID Vectorized, MapWork, Partitioned
-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID.
--- Also, we don't do regular EXPLAINs on ACID files because the transaction id causes Q file statistics differences...
+-- Also, we don't do regular EXPLAINs on ACID files because the write id causes Q file statistics differences...
-- Instead explain vectorization only detail
--
diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_part_update.q b/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_part_update.q
index 9a6bcaa..aba65c9 100644
--- a/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_part_update.q
+++ b/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_part_update.q
@@ -19,7 +19,7 @@ set hive.llap.io.enabled=false;
--
-- FILE VARIATION: ORC, ACID Non-Vectorized, MapWork, Partitioned
-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID.
--- Also, we don't do EXPLAINs on ACID files because the transaction id causes Q file statistics differences...
+-- Also, we don't do EXPLAINs on ACID files because the write id causes Q file statistics differences...
--
CREATE TABLE schema_evolution_data(insert_num int, boolean1 boolean, tinyint1 tinyint, smallint1 smallint, int1 int, bigint1 bigint, decimal1 decimal(38,18), float1 float, double1 double, string1 string, string2 string, date1 date, timestamp1 timestamp, boolean_str string, tinyint_str string, smallint_str string, int_str string, bigint_str string, decimal_str string, float_str string, double_str string, date_str string, timestamp_str string, filler string)
diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_part_update_llap_io.q b/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_part_update_llap_io.q
index 2b404ba..6191d34 100644
--- a/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_part_update_llap_io.q
+++ b/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_part_update_llap_io.q
@@ -20,7 +20,7 @@ set hive.llap.io.encode.enabled=true;
--
-- FILE VARIATION: ORC, ACID Non-Vectorized, MapWork, Partitioned
-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID.
--- Also, we don't do EXPLAINs on ACID files because the transaction id causes Q file statistics differences...
+-- Also, we don't do EXPLAINs on ACID files because the write id causes Q file statistics differences...
--
CREATE TABLE schema_evolution_data(insert_num int, boolean1 boolean, tinyint1 tinyint, smallint1 smallint, int1 int, bigint1 bigint, decimal1 decimal(38,18), float1 float, double1 double, string1 string, string2 string, date1 date, timestamp1 timestamp, boolean_str string, tinyint_str string, smallint_str string, int_str string, bigint_str string, decimal_str string, float_str string, double_str string, date_str string, timestamp_str string, filler string)
diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_table.q b/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_table.q
index 7e23d7f..e26a329 100644
--- a/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_table.q
+++ b/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_table.q
@@ -18,7 +18,7 @@ set hive.llap.io.enabled=false;
--
-- FILE VARIATION: ORC, ACID Vectorized, MapWork, Table
-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID.
--- Also, we don't do regular EXPLAINs on ACID files because the transaction id causes Q file statistics differences...
+-- Also, we don't do regular EXPLAINs on ACID files because the write id causes Q file statistics differences...
-- Instead explain vectorization only detail
--
diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_table_llap_io.q b/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_table_llap_io.q
index b599ed5..7ca5459 100644
--- a/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_table_llap_io.q
+++ b/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_table_llap_io.q
@@ -19,7 +19,7 @@ set hive.llap.io.encode.enabled=true;
--
-- FILE VARIATION: ORC, ACID Vectorized, MapWork, Table
-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID.
--- Also, we don't do regular EXPLAINs on ACID files because the transaction id causes Q file statistics differences...
+-- Also, we don't do regular EXPLAINs on ACID files because the write id causes Q file statistics differences...
-- Instead explain vectorization only detail
--
diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_table_update.q b/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_table_update.q
index 8174e38..65e68a6 100644
--- a/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_table_update.q
+++ b/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_table_update.q
@@ -18,7 +18,7 @@ set hive.llap.io.enabled=false;
--
-- FILE VARIATION: ORC, ACID Vectorized, MapWork, Table
-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID.
--- Also, we don't do EXPLAINs on ACID files because the transaction id causes Q file statistics differences...
+-- Also, we don't do EXPLAINs on ACID files because the write id causes Q file statistics differences...
--
CREATE TABLE schema_evolution_data(insert_num int, boolean1 boolean, tinyint1 tinyint, smallint1 smallint, int1 int, bigint1 bigint, decimal1 decimal(38,18), float1 float, double1 double, string1 string, string2 string, date1 date, timestamp1 timestamp, boolean_str string, tinyint_str string, smallint_str string, int_str string, bigint_str string, decimal_str string, float_str string, double_str string, date_str string, timestamp_str string, filler string)
diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_table_update_llap_io.q b/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_table_update_llap_io.q
index 8e3ba40..f1d20e6 100644
--- a/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_table_update_llap_io.q
+++ b/ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_table_update_llap_io.q
@@ -19,7 +19,7 @@ set hive.llap.io.encode.enabled=true;
--
-- FILE VARIATION: ORC, ACID Vectorized, MapWork, Table
-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID.
--- Also, we don't do EXPLAINs on ACID files because the transaction id causes Q file statistics differences...
+-- Also, we don't do EXPLAINs on ACID files because the write id causes Q file statistics differences...
--
CREATE TABLE schema_evolution_data(insert_num int, boolean1 boolean, tinyint1 tinyint, smallint1 smallint, int1 int, bigint1 bigint, decimal1 decimal(38,18), float1 float, double1 double, string1 string, string2 string, date1 date, timestamp1 timestamp, boolean_str string, tinyint_str string, smallint_str string, int_str string, bigint_str string, decimal_str string, float_str string, double_str string, date_str string, timestamp_str string, filler string)
diff --git a/ql/src/test/results/clientnegative/invalid_cast_from_binary_1.q.out b/ql/src/test/results/clientnegative/invalid_cast_from_binary_1.q.out
index acecbae..bce6dd1 100644
--- a/ql/src/test/results/clientnegative/invalid_cast_from_binary_1.q.out
+++ b/ql/src/test/results/clientnegative/invalid_cast_from_binary_1.q.out
@@ -6,4 +6,4 @@ POSTHOOK: query: create table tbl (a binary)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@tbl
-FAILED: SemanticException Line 0:-1 Wrong arguments 'a': No matching method for class org.apache.hadoop.hive.ql.udf.UDFToInteger with (binary). Possible choices: _FUNC_(bigint) _FUNC_(boolean) _FUNC_(decimal(38,18)) _FUNC_(double) _FUNC_(float) _FUNC_(smallint) _FUNC_(string) _FUNC_(struct) _FUNC_(timestamp) _FUNC_(tinyint) _FUNC_(void)
+FAILED: SemanticException Line 0:-1 Wrong arguments 'a': No matching method for class org.apache.hadoop.hive.ql.udf.UDFToInteger with (binary). Possible choices: _FUNC_(bigint) _FUNC_(boolean) _FUNC_(decimal(38,18)) _FUNC_(double) _FUNC_(float) _FUNC_(smallint) _FUNC_(string) _FUNC_(struct) _FUNC_(timestamp) _FUNC_(tinyint) _FUNC_(void)
diff --git a/ql/src/test/results/clientpositive/acid_subquery.q.out b/ql/src/test/results/clientpositive/acid_subquery.q.out
index 77bafe7..1dc1775 100644
--- a/ql/src/test/results/clientpositive/acid_subquery.q.out
+++ b/ql/src/test/results/clientpositive/acid_subquery.q.out
@@ -99,4 +99,4 @@ POSTHOOK: Output: default@target@p=1/q=3
POSTHOOK: Output: default@target@p=1/q=3
POSTHOOK: Output: default@target@p=2/q=2
POSTHOOK: Output: default@target@p=2/q=2
-POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(target)t.FieldSchema(name:ROW__ID, type:struct, comment:), (target)t.FieldSchema(name:p, type:int, comment:null), (target)t.FieldSchema(name:q, type:int, comment:null), ]
+POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(target)t.FieldSchema(name:ROW__ID, type:struct, comment:), (target)t.FieldSchema(name:p, type:int, comment:null), (target)t.FieldSchema(name:q, type:int, comment:null), ]
diff --git a/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out b/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out
index 629b05d..89b7169 100644
--- a/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out
+++ b/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out
@@ -124,11 +124,11 @@ STAGE PLANS:
predicate: (UDFToInteger(key)) IN (413, 43) (type: boolean)
Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
- expressions: ROW__ID (type: struct), key (type: string), concat(value, 'updated') (type: string), ds (type: string)
+ expressions: ROW__ID (type: struct), key (type: string), concat(value, 'updated') (type: string), ds (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
- key expressions: _col0 (type: struct)
+ key expressions: _col0 (type: struct)
sort order: +
Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL
value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string)
@@ -138,7 +138,7 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), '11' (type: string)
+ expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), '11' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
@@ -306,11 +306,11 @@ STAGE PLANS:
predicate: (key) IN ('1001', '213', '43') (type: boolean)
Statistics: Num rows: 20 Data size: 9100 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
- expressions: ROW__ID (type: struct), ds (type: string), hr (type: string)
+ expressions: ROW__ID (type: struct), ds (type: string), hr (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
- key expressions: _col0 (type: struct)
+ key expressions: _col0 (type: struct)
sort order: +
Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL
value expressions: _col1 (type: string), _col2 (type: string)
@@ -320,7 +320,7 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string)
+ expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
@@ -451,7 +451,7 @@ POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11
POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11
POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12
POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12
-POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(srcpart_acid)t.FieldSchema(name:ROW__ID, type:struct, comment:), (srcpart_acid)t.FieldSchema(name:ds, type:string, comment:null), (srcpart_acid)t.FieldSchema(name:hr, type:string, comment:null), ]
+POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(srcpart_acid)t.FieldSchema(name:ROW__ID, type:struct, comment:), (srcpart_acid)t.FieldSchema(name:ds, type:string, comment:null), (srcpart_acid)t.FieldSchema(name:hr, type:string, comment:null), ]
PREHOOK: query: select count(*) from srcpart_acid where ds='2008-04-08' and hr=='12'
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart_acid
@@ -623,11 +623,11 @@ STAGE PLANS:
predicate: (UDFToInteger(key)) IN (413, 43) (type: boolean)
Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
- expressions: ROW__ID (type: struct), key (type: string), concat(value, 'updated') (type: string), ds (type: string)
+ expressions: ROW__ID (type: struct), key (type: string), concat(value, 'updated') (type: string), ds (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
- key expressions: _col0 (type: struct)
+ key expressions: _col0 (type: struct)
sort order: +
Map-reduce partition columns: UDFToInteger(_col0) (type: int)
Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL
@@ -638,7 +638,7 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), '11' (type: string)
+ expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), '11' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
@@ -806,11 +806,11 @@ STAGE PLANS:
predicate: (key) IN ('1001', '213', '43') (type: boolean)
Statistics: Num rows: 20 Data size: 9100 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
- expressions: ROW__ID (type: struct), ds (type: string), hr (type: string)
+ expressions: ROW__ID (type: struct), ds (type: string), hr (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
- key expressions: _col0 (type: struct)
+ key expressions: _col0 (type: struct)
sort order: +
Map-reduce partition columns: UDFToInteger(_col0) (type: int)
Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL
@@ -821,7 +821,7 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string)
+ expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
@@ -952,7 +952,7 @@ POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11
POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11
POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12
POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12
-POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(srcpart_acidb)t.FieldSchema(name:ROW__ID, type:struct, comment:), (srcpart_acidb)t.FieldSchema(name:ds, type:string, comment:null), (srcpart_acidb)t.FieldSchema(name:hr, type:string, comment:null), ]
+POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(srcpart_acidb)t.FieldSchema(name:ROW__ID, type:struct, comment:), (srcpart_acidb)t.FieldSchema(name:ds, type:string, comment:null), (srcpart_acidb)t.FieldSchema(name:hr, type:string, comment:null), ]
PREHOOK: query: select count(*) from srcpart_acidb where ds='2008-04-08' and hr=='12'
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart_acidb
@@ -1123,7 +1123,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan Vectorization:
native: true
- vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ds:string, 3:hr:string, 4:ROW__ID:struct]
+ vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ds:string, 3:hr:string, 4:ROW__ID:struct]
Filter Vectorization:
className: VectorFilterOperator
native: true
@@ -1170,7 +1170,7 @@ STAGE PLANS:
vectorized: true
rowBatchContext:
dataColumnCount: 4
- dataColumns: KEY.reducesinkkey0:struct, VALUE._col0:string, VALUE._col1:string, VALUE._col2:string
+ dataColumns: KEY.reducesinkkey0:struct, VALUE._col0:string, VALUE._col1:string, VALUE._col2:string
partitionColumnCount: 0
scratchColumnTypeNames: [string]
Reduce Operator Tree:
@@ -1322,7 +1322,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan Vectorization:
native: true
- vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ds:string, 3:hr:string, 4:ROW__ID:struct]
+ vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ds:string, 3:hr:string, 4:ROW__ID:struct]
Filter Vectorization:
className: VectorFilterOperator
native: true
@@ -1368,7 +1368,7 @@ STAGE PLANS:
vectorized: true
rowBatchContext:
dataColumnCount: 3
- dataColumns: KEY.reducesinkkey0:struct, VALUE._col0:string, VALUE._col1:string
+ dataColumns: KEY.reducesinkkey0:struct, VALUE._col0:string, VALUE._col1:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
@@ -1487,7 +1487,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan Vectorization:
native: true
- vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ds:string, 3:hr:string, 4:ROW__ID:struct]
+ vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ds:string, 3:hr:string, 4:ROW__ID:struct]
Select Vectorization:
className: VectorSelectOperator
native: true
@@ -1528,7 +1528,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan Vectorization:
native: true
- vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ds:string, 3:hr:string, 4:ROW__ID:struct]
+ vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ds:string, 3:hr:string, 4:ROW__ID:struct]
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
keyColumnNums: [2, 3, 0, 1]
@@ -1626,7 +1626,7 @@ STAGE PLANS:
vectorized: true
rowBatchContext:
dataColumnCount: 3
- dataColumns: KEY.reducesinkkey0:struct, VALUE._col0:string, VALUE._col1:string
+ dataColumns: KEY.reducesinkkey0:struct, VALUE._col0:string, VALUE._col1:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
@@ -1649,7 +1649,7 @@ STAGE PLANS:
vectorized: true
rowBatchContext:
dataColumnCount: 5
- dataColumns: KEY.reducesinkkey0:struct, VALUE._col0:string, VALUE._col1:string, VALUE._col2:string, VALUE._col3:string
+ dataColumns: KEY.reducesinkkey0:struct, VALUE._col0:string, VALUE._col1:string, VALUE._col2:string, VALUE._col3:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
@@ -1736,7 +1736,7 @@ POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11
POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11
POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12
POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12
-POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(srcpart_acidv)t.FieldSchema(name:ROW__ID, type:struct, comment:), (srcpart_acidv)t.FieldSchema(name:ds, type:string, comment:null), (srcpart_acidv)t.FieldSchema(name:hr, type:string, comment:null), ]
+POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(srcpart_acidv)t.FieldSchema(name:ROW__ID, type:struct, comment:), (srcpart_acidv)t.FieldSchema(name:ds, type:string, comment:null), (srcpart_acidv)t.FieldSchema(name:hr, type:string, comment:null), ]
PREHOOK: query: select count(*) from srcpart_acidv where ds='2008-04-08' and hr=='12'
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart_acidv
@@ -1907,7 +1907,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan Vectorization:
native: true
- vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ds:string, 3:hr:string, 4:ROW__ID:struct]
+ vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ds:string, 3:hr:string, 4:ROW__ID:struct]
Filter Vectorization:
className: VectorFilterOperator
native: true
@@ -1955,7 +1955,7 @@ STAGE PLANS:
vectorized: true
rowBatchContext:
dataColumnCount: 4
- dataColumns: KEY.reducesinkkey0:struct, VALUE._col0:string, VALUE._col1:string, VALUE._col2:string
+ dataColumns: KEY.reducesinkkey0:struct, VALUE._col0:string, VALUE._col1:string, VALUE._col2:string
partitionColumnCount: 0
scratchColumnTypeNames: [string]
Reduce Operator Tree:
@@ -2107,7 +2107,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan Vectorization:
native: true
- vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ds:string, 3:hr:string, 4:ROW__ID:struct]
+ vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ds:string, 3:hr:string, 4:ROW__ID:struct]
Filter Vectorization:
className: VectorFilterOperator
native: true
@@ -2154,7 +2154,7 @@ STAGE PLANS:
vectorized: true
rowBatchContext:
dataColumnCount: 3
- dataColumns: KEY.reducesinkkey0:struct, VALUE._col0:string, VALUE._col1:string
+ dataColumns: KEY.reducesinkkey0:struct, VALUE._col0:string, VALUE._col1:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
@@ -2274,7 +2274,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan Vectorization:
native: true
- vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ds:string, 3:hr:string, 4:ROW__ID:struct]
+ vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ds:string, 3:hr:string, 4:ROW__ID:struct]
Select Vectorization:
className: VectorSelectOperator
native: true
@@ -2315,7 +2315,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan Vectorization:
native: true
- vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ds:string, 3:hr:string, 4:ROW__ID:struct]
+ vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ds:string, 3:hr:string, 4:ROW__ID:struct]
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
keyColumnNums: [2, 3, 0, 1]
@@ -2413,7 +2413,7 @@ STAGE PLANS:
vectorized: true
rowBatchContext:
dataColumnCount: 3
- dataColumns: KEY.reducesinkkey0:struct, VALUE._col0:string, VALUE._col1:string
+ dataColumns: KEY.reducesinkkey0:struct, VALUE._col0:string, VALUE._col1:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
@@ -2436,7 +2436,7 @@ STAGE PLANS:
vectorized: true
rowBatchContext:
dataColumnCount: 5
- dataColumns: KEY.reducesinkkey0:struct, VALUE._col0:string, VALUE._col1:string, VALUE._col2:string, VALUE._col3:string
+ dataColumns: KEY.reducesinkkey0:struct, VALUE._col0:string, VALUE._col1:string, VALUE._col2:string, VALUE._col3:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
@@ -2531,7 +2531,7 @@ POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11
POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11
POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12
POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12
-POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(srcpart_acidvb)t.FieldSchema(name:ROW__ID, type:struct, comment:), (srcpart_acidvb)t.FieldSchema(name:ds, type:string, comment:null), (srcpart_acidvb)t.FieldSchema(name:hr, type:string, comment:null), ]
+POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(srcpart_acidvb)t.FieldSchema(name:ROW__ID, type:struct, comment:), (srcpart_acidvb)t.FieldSchema(name:ds, type:string, comment:null), (srcpart_acidvb)t.FieldSchema(name:hr, type:string, comment:null), ]
PREHOOK: query: select count(*) from srcpart_acidvb where ds='2008-04-08' and hr=='12'
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart_acidvb
diff --git a/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out b/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
index 11a99db..64e5b17 100644
--- a/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
+++ b/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
@@ -504,11 +504,11 @@ STAGE PLANS:
predicate: ((b = 4294967363L) and (t < 100Y)) (type: boolean)
Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: ROW__ID (type: struct), t (type: tinyint), si (type: smallint), i (type: int)
+ expressions: ROW__ID (type: struct), t (type: tinyint), si (type: smallint), i (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col0 (type: struct)
+ key expressions: _col0 (type: struct)
sort order: +
Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: tinyint), _col2 (type: smallint), _col3 (type: int)
@@ -518,7 +518,7 @@ STAGE PLANS:
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: int)
+ expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
@@ -543,12 +543,12 @@ POSTHOOK: query: select ROW__ID, t, si, i from over10k_orc_bucketed where b = 42
POSTHOOK: type: QUERY
POSTHOOK: Input: default@over10k_orc_bucketed
#### A masked pattern was here ####
-{"transactionid":0,"bucketid":536870912,"rowid":36} -3 344 65733
-{"transactionid":0,"bucketid":536870912,"rowid":300} -3 344 65733
-{"transactionid":0,"bucketid":536936448,"rowid":82} 5 501 65585
-{"transactionid":0,"bucketid":536936448,"rowid":344} 5 501 65585
-{"transactionid":0,"bucketid":537067520,"rowid":211} 35 463 65646
-{"transactionid":0,"bucketid":537067520,"rowid":488} 35 463 65646
+{"writeid":0,"bucketid":536870912,"rowid":36} -3 344 65733
+{"writeid":0,"bucketid":536870912,"rowid":300} -3 344 65733
+{"writeid":0,"bucketid":536936448,"rowid":82} 5 501 65585
+{"writeid":0,"bucketid":536936448,"rowid":344} 5 501 65585
+{"writeid":0,"bucketid":537067520,"rowid":211} 35 463 65646
+{"writeid":0,"bucketid":537067520,"rowid":488} 35 463 65646
PREHOOK: query: explain update over10k_orc_bucketed set i = 0 where b = 4294967363 and t < 100
PREHOOK: type: QUERY
POSTHOOK: query: explain update over10k_orc_bucketed set i = 0 where b = 4294967363 and t < 100
@@ -577,11 +577,11 @@ STAGE PLANS:
predicate: ((b = 4294967363L) and (t < 100Y)) (type: boolean)
Statistics: Num rows: 2 Data size: 674 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: ROW__ID (type: struct), t (type: tinyint), si (type: smallint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary)
+ expressions: ROW__ID (type: struct), t (type: tinyint), si (type: smallint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary)
outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7, _col8, _col9, _col10, _col11
Statistics: Num rows: 2 Data size: 834 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col0 (type: struct)
+ key expressions: _col0 (type: struct)
sort order: +
Map-reduce partition columns: UDFToInteger(_col0) (type: int)
Statistics: Num rows: 2 Data size: 834 Basic stats: COMPLETE Column stats: COMPLETE
@@ -592,7 +592,7 @@ STAGE PLANS:
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), 0 (type: int), 4294967363L (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: double), VALUE._col5 (type: boolean), VALUE._col6 (type: string), VALUE._col7 (type: timestamp), VALUE._col8 (type: decimal(4,2)), VALUE._col9 (type: binary)
+ expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), 0 (type: int), 4294967363L (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: double), VALUE._col5 (type: boolean), VALUE._col6 (type: string), VALUE._col7 (type: timestamp), VALUE._col8 (type: decimal(4,2)), VALUE._col9 (type: binary)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
Statistics: Num rows: 2 Data size: 834 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
@@ -639,12 +639,12 @@ POSTHOOK: query: select ROW__ID, t, si, i from over10k_orc_bucketed where b = 42
POSTHOOK: type: QUERY
POSTHOOK: Input: default@over10k_orc_bucketed
#### A masked pattern was here ####
-{"transactionid":### Masked txnid ###,"bucketid":536870912,"rowid":0} -3 344 0
-{"transactionid":### Masked txnid ###,"bucketid":536870912,"rowid":1} -3 344 0
-{"transactionid":### Masked txnid ###,"bucketid":536936448,"rowid":0} 5 501 0
-{"transactionid":### Masked txnid ###,"bucketid":536936448,"rowid":1} 5 501 0
-{"transactionid":### Masked txnid ###,"bucketid":537067520,"rowid":0} 35 463 0
-{"transactionid":### Masked txnid ###,"bucketid":537067520,"rowid":1} 35 463 0
+{"writeid":### Masked writeid ###,"bucketid":536870912,"rowid":0} -3 344 0
+{"writeid":### Masked writeid ###,"bucketid":536870912,"rowid":1} -3 344 0
+{"writeid":### Masked writeid ###,"bucketid":536936448,"rowid":0} 5 501 0
+{"writeid":### Masked writeid ###,"bucketid":536936448,"rowid":1} 5 501 0
+{"writeid":### Masked writeid ###,"bucketid":537067520,"rowid":0} 35 463 0
+{"writeid":### Masked writeid ###,"bucketid":537067520,"rowid":1} 35 463 0
PREHOOK: query: explain select ROW__ID, count(*) from over10k_orc_bucketed group by ROW__ID having count(*) > 1
PREHOOK: type: QUERY
POSTHOOK: query: explain select ROW__ID, count(*) from over10k_orc_bucketed group by ROW__ID having count(*) > 1
@@ -667,19 +667,19 @@ STAGE PLANS:
alias: over10k_orc_bucketed
Statistics: Num rows: 2098 Data size: 622340 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: ROW__ID (type: struct)
+ expressions: ROW__ID (type: struct)
outputColumnNames: ROW__ID
Statistics: Num rows: 2098 Data size: 622340 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
- keys: ROW__ID (type: struct)
+ keys: ROW__ID (type: struct)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1049 Data size: 88116 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col0 (type: struct)
+ key expressions: _col0 (type: struct)
sort order: +
- Map-reduce partition columns: _col0 (type: struct)
+ Map-reduce partition columns: _col0 (type: struct)
Statistics: Num rows: 1049 Data size: 88116 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint)
Execution mode: llap
@@ -689,7 +689,7 @@ STAGE PLANS:
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
- keys: KEY._col0 (type: struct)
+ keys: KEY._col0 (type: struct)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 1049 Data size: 88116 Basic stats: COMPLETE Column stats: COMPLETE
diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out
index 4a8192d..f49ecb0 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out
@@ -64,7 +64,7 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: a (type: int)
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
- value expressions: ROW__ID (type: struct)
+ value expressions: ROW__ID (type: struct)
Execution mode: llap
LLAP IO: may be used (ACID table)
Map 8
@@ -108,11 +108,11 @@ STAGE PLANS:
predicate: ((_col0 = _col5) and (_col5 > 8)) (type: boolean)
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col4 (type: struct)
+ expressions: _col4 (type: struct)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: struct)
+ key expressions: _col0 (type: struct)
sort order: +
Map-reduce partition columns: UDFToInteger(_col0) (type: int)
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
@@ -120,11 +120,11 @@ STAGE PLANS:
predicate: ((_col0 = _col5) and (_col5 <= 8)) (type: boolean)
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col4 (type: struct), _col0 (type: int)
+ expressions: _col4 (type: struct), _col0 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: struct)
+ key expressions: _col0 (type: struct)
sort order: +
Map-reduce partition columns: UDFToInteger(_col0) (type: int)
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
@@ -133,19 +133,19 @@ STAGE PLANS:
predicate: (_col0 = _col5) (type: boolean)
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col4 (type: struct)
+ expressions: _col4 (type: struct)
outputColumnNames: _col4
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
- keys: _col4 (type: struct)
+ keys: _col4 (type: struct)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: struct)
+ key expressions: _col0 (type: struct)
sort order: +
- Map-reduce partition columns: _col0 (type: struct)
+ Map-reduce partition columns: _col0 (type: struct)
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
Filter Operator
@@ -164,7 +164,7 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: struct)
+ expressions: KEY.reducesinkkey0 (type: struct)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -180,7 +180,7 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 7 (type: int)
+ expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 7 (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -197,7 +197,7 @@ STAGE PLANS:
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
- keys: KEY._col0 (type: struct)
+ keys: KEY._col0 (type: struct)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
@@ -658,7 +658,7 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: a (type: int)
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
- value expressions: ROW__ID (type: struct)
+ value expressions: ROW__ID (type: struct)
Execution mode: llap
LLAP IO: may be used (ACID table)
Reducer 3
@@ -676,11 +676,11 @@ STAGE PLANS:
predicate: ((_col0 = _col5) and (_col5 > 8)) (type: boolean)
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col4 (type: struct)
+ expressions: _col4 (type: struct)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: struct)
+ key expressions: _col0 (type: struct)
sort order: +
Map-reduce partition columns: UDFToInteger(_col0) (type: int)
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
@@ -688,11 +688,11 @@ STAGE PLANS:
predicate: ((_col0 = _col5) and (_col5 <= 8)) (type: boolean)
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col4 (type: struct), _col0 (type: int)
+ expressions: _col4 (type: struct), _col0 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: struct)
+ key expressions: _col0 (type: struct)
sort order: +
Map-reduce partition columns: UDFToInteger(_col0) (type: int)
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
@@ -701,19 +701,19 @@ STAGE PLANS:
predicate: (_col0 = _col5) (type: boolean)
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col4 (type: struct)
+ expressions: _col4 (type: struct)
outputColumnNames: _col4
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
- keys: _col4 (type: struct)
+ keys: _col4 (type: struct)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: struct)
+ key expressions: _col0 (type: struct)
sort order: +
- Map-reduce partition columns: _col0 (type: struct)
+ Map-reduce partition columns: _col0 (type: struct)
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
Filter Operator
@@ -732,7 +732,7 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: struct)
+ expressions: KEY.reducesinkkey0 (type: struct)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -748,7 +748,7 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 7 (type: int)
+ expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: int), 7 (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -765,7 +765,7 @@ STAGE PLANS:
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
- keys: KEY._col0 (type: struct)
+ keys: KEY._col0 (type: struct)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
@@ -1149,7 +1149,7 @@ POSTHOOK: Lineage: customer.name SIMPLE [(new_customer_stage)stage.FieldSchema(n
POSTHOOK: Lineage: customer.sk EXPRESSION [(new_customer_stage)stage.FieldSchema(name:name, type:string, comment:null), ]
POSTHOOK: Lineage: customer.source_pk SIMPLE [(new_customer_stage)stage.FieldSchema(name:source_pk, type:int, comment:null), ]
POSTHOOK: Lineage: customer.state SIMPLE [(new_customer_stage)stage.FieldSchema(name:state, type:string, comment:null), ]
-POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(customer)customer.FieldSchema(name:ROW__ID, type:struct, comment:), ]
+POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(customer)customer.FieldSchema(name:ROW__ID, type:struct, comment:), ]
PREHOOK: query: select * from customer order by source_pk, is_current
PREHOOK: type: QUERY
PREHOOK: Input: type2_scd_helper@customer
diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out
index 7498485..6a97736 100644
--- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out
@@ -99,11 +99,11 @@ STAGE PLANS:
predicate: (key = 'foo') (type: boolean)
Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: ROW__ID (type: struct)
+ expressions: ROW__ID (type: struct)
outputColumnNames: _col0
Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: struct)
+ key expressions: _col0 (type: struct)
sort order: +
Map-reduce partition columns: UDFToInteger(_col0) (type: int)
Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
@@ -113,7 +113,7 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string)
+ expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -194,11 +194,11 @@ STAGE PLANS:
predicate: (key = 'foo') (type: boolean)
Statistics: Num rows: 5 Data size: 1355 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
- expressions: ROW__ID (type: struct), ds (type: string)
+ expressions: ROW__ID (type: struct), ds (type: string)
outputColumnNames: _col0, _col3
Statistics: Num rows: 5 Data size: 2170 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
- key expressions: _col0 (type: struct)
+ key expressions: _col0 (type: struct)
sort order: +
Map-reduce partition columns: UDFToInteger(_col0) (type: int)
Statistics: Num rows: 5 Data size: 2170 Basic stats: COMPLETE Column stats: PARTIAL
@@ -209,7 +209,7 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), VALUE._col1 (type: string)
+ expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), VALUE._col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 5 Data size: 2170 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
@@ -385,11 +385,11 @@ STAGE PLANS:
predicate: (key = 'foo') (type: boolean)
Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: ROW__ID (type: struct)
+ expressions: ROW__ID (type: struct)
outputColumnNames: _col0
Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: struct)
+ key expressions: _col0 (type: struct)
sort order: +
Map-reduce partition columns: UDFToInteger(_col0) (type: int)
Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
@@ -399,7 +399,7 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string)
+ expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -480,11 +480,11 @@ STAGE PLANS:
predicate: (key = 'foo') (type: boolean)
Statistics: Num rows: 5 Data size: 1355 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
- expressions: ROW__ID (type: struct), ds (type: string)
+ expressions: ROW__ID (type: struct), ds (type: string)
outputColumnNames: _col0, _col3
Statistics: Num rows: 5 Data size: 2170 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
- key expressions: _col3 (type: string), '_bucket_number' (type: string), _col0 (type: struct)
+ key expressions: _col3 (type: string), '_bucket_number' (type: string), _col0 (type: struct)
sort order: +++
Map-reduce partition columns: _col3 (type: string)
Statistics: Num rows: 5 Data size: 2170 Basic stats: COMPLETE Column stats: PARTIAL
@@ -494,7 +494,7 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Select Operator
- expressions: KEY._col0 (type: struct), 'foo' (type: string), 'bar' (type: string), KEY._col3 (type: string), KEY.'_bucket_number' (type: string)
+ expressions: KEY._col0 (type: struct), 'foo' (type: string), 'bar' (type: string), KEY._col3 (type: string), KEY.'_bucket_number' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, '_bucket_number'
Statistics: Num rows: 5 Data size: 1790 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
@@ -680,11 +680,11 @@ STAGE PLANS:
predicate: (key = 'foo') (type: boolean)
Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: ROW__ID (type: struct)
+ expressions: ROW__ID (type: struct)
outputColumnNames: _col0
Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: struct)
+ key expressions: _col0 (type: struct)
sort order: +
Map-reduce partition columns: UDFToInteger(_col0) (type: int)
Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
@@ -694,7 +694,7 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), 11 (type: int)
+ expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), 11 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -776,11 +776,11 @@ STAGE PLANS:
predicate: (key = 'foo') (type: boolean)
Statistics: Num rows: 5 Data size: 455 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
- expressions: ROW__ID (type: struct), hr (type: int)
+ expressions: ROW__ID (type: struct), hr (type: int)
outputColumnNames: _col0, _col4
Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
- key expressions: _col0 (type: struct)
+ key expressions: _col0 (type: struct)
sort order: +
Map-reduce partition columns: UDFToInteger(_col0) (type: int)
Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL
@@ -791,7 +791,7 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), VALUE._col2 (type: int)
+ expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), VALUE._col2 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
@@ -902,11 +902,11 @@ STAGE PLANS:
predicate: (value = 'bar') (type: boolean)
Statistics: Num rows: 5 Data size: 1409 Basic stats: PARTIAL Column stats: PARTIAL
Select Operator
- expressions: ROW__ID (type: struct), ds (type: string), hr (type: int)
+ expressions: ROW__ID (type: struct), ds (type: string), hr (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 5 Data size: 1409 Basic stats: PARTIAL Column stats: PARTIAL
Reduce Output Operator
- key expressions: _col0 (type: struct)
+ key expressions: _col0 (type: struct)
sort order: +
Map-reduce partition columns: UDFToInteger(_col0) (type: int)
Statistics: Num rows: 5 Data size: 1409 Basic stats: PARTIAL Column stats: PARTIAL
@@ -917,7 +917,7 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: int)
+ expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 5 Data size: 1409 Basic stats: PARTIAL Column stats: PARTIAL
File Output Operator
@@ -1100,11 +1100,11 @@ STAGE PLANS:
predicate: (key = 'foo') (type: boolean)
Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: ROW__ID (type: struct)
+ expressions: ROW__ID (type: struct)
outputColumnNames: _col0
Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: struct)
+ key expressions: _col0 (type: struct)
sort order: +
Map-reduce partition columns: UDFToInteger(_col0) (type: int)
Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
@@ -1114,7 +1114,7 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), 11 (type: int)
+ expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), 11 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 5 Data size: 469 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1196,11 +1196,11 @@ STAGE PLANS:
predicate: (key = 'foo') (type: boolean)
Statistics: Num rows: 5 Data size: 455 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
- expressions: ROW__ID (type: struct), hr (type: int)
+ expressions: ROW__ID (type: struct), hr (type: int)
outputColumnNames: _col0, _col4
Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
- key expressions: '2008-04-08' (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct)
+ key expressions: '2008-04-08' (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct)
sort order: ++++
Map-reduce partition columns: '2008-04-08' (type: string), _col4 (type: int)
Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL
@@ -1210,7 +1210,7 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Select Operator
- expressions: KEY._col0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), KEY._col4 (type: int), KEY.'_bucket_number' (type: string)
+ expressions: KEY._col0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), KEY._col4 (type: int), KEY.'_bucket_number' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number'
Statistics: Num rows: 5 Data size: 1360 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
@@ -1322,11 +1322,11 @@ STAGE PLANS:
predicate: (value = 'bar') (type: boolean)
Statistics: Num rows: 5 Data size: 1409 Basic stats: PARTIAL Column stats: PARTIAL
Select Operator
- expressions: ROW__ID (type: struct), ds (type: string), hr (type: int)
+ expressions: ROW__ID (type: struct), ds (type: string), hr (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 5 Data size: 1409 Basic stats: PARTIAL Column stats: PARTIAL
Reduce Output Operator
- key expressions: _col1 (type: string), _col2 (type: int), '_bucket_number' (type: string), _col0 (type: struct)
+ key expressions: _col1 (type: string), _col2 (type: int), '_bucket_number' (type: string), _col0 (type: struct)
sort order: ++++
Map-reduce partition columns: _col1 (type: string), _col2 (type: int)
Statistics: Num rows: 5 Data size: 1409 Basic stats: PARTIAL Column stats: PARTIAL
@@ -1336,7 +1336,7 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Select Operator
- expressions: KEY._col0 (type: struct), KEY._col1 (type: string), KEY._col2 (type: int), KEY.'_bucket_number' (type: string)
+ expressions: KEY._col0 (type: struct), KEY._col1 (type: string), KEY._col2 (type: int), KEY.'_bucket_number' (type: string)
outputColumnNames: _col0, _col1, _col2, '_bucket_number'
Statistics: Num rows: 5 Data size: 1409 Basic stats: PARTIAL Column stats: PARTIAL
File Output Operator
@@ -1520,11 +1520,11 @@ STAGE PLANS:
predicate: (key = 'foo') (type: boolean)
Statistics: Num rows: 5 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
- expressions: ROW__ID (type: struct), key (type: string), ds (type: string), hr (type: int)
+ expressions: ROW__ID (type: struct), key (type: string), ds (type: string), hr (type: int)
outputColumnNames: _col0, _col1, _col3, _col4
Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
- key expressions: _col3 (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct)
+ key expressions: _col3 (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct)
sort order: ++++
Map-reduce partition columns: _col3 (type: string), _col4 (type: int)
Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL
@@ -1535,7 +1535,7 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Select Operator
- expressions: KEY._col0 (type: struct