diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 772113acda..3bc0cb4d0f 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -456,7 +456,9 @@ minillap.query.files=acid_bucket_pruning.q,\ llap_stats.q,\ multi_count_distinct_null.q -minillaplocal.query.files=acid_globallimit.q,\ +minillaplocal.query.files=\ + acid_no_buckets.q, \ + acid_globallimit.q,\ acid_vectorization_missing_cols.q,\ alter_merge_stats_orc.q,\ auto_join30.q,\ diff --git metastore/src/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java metastore/src/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java index 023d703543..ccd8ca1461 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java @@ -98,12 +98,25 @@ private void handleAlterTableTransactionalProp(PreAlterTableEvent context) throw // that will use it down below. } } + Table oldTable = context.getOldTable(); + String oldTransactionalValue = null; + String oldTransactionalPropertiesValue = null; + for (String key : oldTable.getParameters().keySet()) { + if (hive_metastoreConstants.TABLE_IS_TRANSACTIONAL.equalsIgnoreCase(key)) { + oldTransactionalValue = oldTable.getParameters().get(key); + } + if (hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES.equalsIgnoreCase(key)) { + oldTransactionalPropertiesValue = oldTable.getParameters().get(key); + } + } + if (transactionalValuePresent) { //normalize prop name parameters.put(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, transactionalValue); } - if ("true".equalsIgnoreCase(transactionalValue)) { - if (!conformToAcid(newTable)) { + if ("true".equalsIgnoreCase(transactionalValue) && !"true".equalsIgnoreCase(oldTransactionalValue)) { + //only need to check conformance if alter table enabled aicd + if (!conformToAcid(newTable, false)) { throw new MetaException("The table must be bucketed and stored using an ACID compliant" + " format (such as ORC)"); } @@ -115,17 +128,6 @@ private void handleAlterTableTransactionalProp(PreAlterTableEvent context) throw hasValidTransactionalValue = true; } - Table oldTable = context.getOldTable(); - String oldTransactionalValue = null; - String oldTransactionalPropertiesValue = null; - for (String key : oldTable.getParameters().keySet()) { - if (hive_metastoreConstants.TABLE_IS_TRANSACTIONAL.equalsIgnoreCase(key)) { - oldTransactionalValue = oldTable.getParameters().get(key); - } - if (hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES.equalsIgnoreCase(key)) { - oldTransactionalPropertiesValue = oldTable.getParameters().get(key); - } - } if (oldTransactionalValue == null ? transactionalValue == null @@ -194,7 +196,9 @@ private void handleCreateTableTransactionalProp(PreCreateTableEvent context) thr } if ("true".equalsIgnoreCase(transactionalValue)) { - if (!conformToAcid(newTable)) { + if (!conformToAcid(newTable, true)) { + //todo: fix this later - .q.out file noise + //throw new MetaException("The table must be (bucketed or default) and stored using an ACID compliant" + throw new MetaException("The table must be bucketed and stored using an ACID compliant" + " format (such as ORC)"); } @@ -214,11 +218,20 @@ private void handleCreateTableTransactionalProp(PreCreateTableEvent context) thr throw new MetaException("'transactional' property of TBLPROPERTIES may only have value 'true'"); } - // Check if table is bucketed and InputFormatClass/OutputFormatClass should implement - // AcidInputFormat/AcidOutputFormat - private boolean conformToAcid(Table table) throws MetaException { + /** + * Check if table is bucketed and InputFormatClass/OutputFormatClass should implement + * AcidInputFormat/AcidOutputFormat + * @param isCreate true if create, false if alter table + */ + private boolean conformToAcid(Table table, boolean isCreate) throws MetaException { StorageDescriptor sd = table.getSd(); if (sd.getBucketColsSize() < 1) { + if(isCreate && DEFAULT_TRANSACTIONAL_PROPERTY.equalsIgnoreCase( + table.getParameters().get(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES))) { + //only acid 2.0 supports creating new un-bucketed tables. Converting non acid to acid + //table via alter table is not supported (HIVE-17214) + return true; + } return false; } diff --git ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index 9c9d4e7897..b3ef9169c2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -418,8 +418,8 @@ " does not support these operations."), VALUES_TABLE_CONSTRUCTOR_NOT_SUPPORTED(10296, "Values clause with table constructor not yet supported"), - ACID_OP_ON_NONACID_TABLE(10297, "Attempt to do update or delete on table {0} that does not use " + - "an AcidOutputFormat or is not bucketed", true), + ACID_OP_ON_NONACID_TABLE(10297, "Attempt to do update or delete on table {0} that is " + + "not transactional", true), ACID_NO_SORTED_BUCKETS(10298, "ACID insert, update, delete not supported on tables that are " + "sorted, table {0}", true), ALTER_TABLE_TYPE_PARTIAL_PARTITION_SPEC_NO_SUPPORTED(10299, diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java index 25ad1e9acb..b2c95e57de 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConfUtil; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.io.AcidUtils; @@ -285,6 +286,7 @@ public Stat getStat() { private transient int numFiles; protected transient boolean multiFileSpray; protected transient final Map bucketMap = new HashMap(); + private transient boolean isBucketed = false; private transient ObjectInspector[] partitionObjectInspectors; protected transient HivePartitioner prtner; @@ -345,6 +347,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { isNativeTable = !conf.getTableInfo().isNonNative(); isTemporary = conf.isTemporary(); multiFileSpray = conf.isMultiFileSpray(); + this.isBucketed = hconf.getInt(hive_metastoreConstants.BUCKET_COUNT, 0) > 0; totalFiles = conf.getTotalFiles(); numFiles = conf.getNumFiles(); dpCtx = conf.getDynPartCtx(); @@ -791,9 +794,28 @@ public void process(Object row, int tag) throws HiveException { * Hive.copyFiles() will make one of them bucket_N_copy_M in the final location. The * reset of acid (read path) doesn't know how to handle copy_N files except for 'original' * files (HIVE-16177)*/ + int writerId = -1; + if(!isBucketed) { + assert !multiFileSpray; + assert writerOffset == 0; + /*For un-bucketed tables, ROW__IDs with different 'bucketNum' values can be written to + * the same bucketN file. N in this case is writerId and there is no relationship + * between the file name and any property of the data in it. With acid 2.0 and late + * update split this will be true for Inserts (derived from update) as well. Once we + * move to early update split, then each FileSinkOperator will either see all Delete or + * all Insert events and thus Inserts will be written to bucketN file such that all + * ROW__ID.bucketId indeed contain writerId=N. + * todo: Should we name the file 00000 or something to make it less confusing? + * Since taskId is unique (at least per statementId and thus + * per [delete_]delta_x_y_stmtId/) there will not be any copy_N files. + * + * Oops: compactor relies on bucketN being correct with respect to data in the file for + * bucketed tables. What does it do for non bucketed tables?*/ + writerId = Integer.parseInt(Utilities.getTaskIdFromFilename(taskId)); + } fpaths.updaters[writerOffset] = HiveFileFormatUtils.getAcidRecordUpdater( - jc, conf.getTableInfo(), bucketNum, conf, fpaths.outPaths[writerOffset], - rowInspector, reporter, 0); + jc, conf.getTableInfo(), writerId >= 0 ? writerId : bucketNum, conf, + fpaths.outPaths[writerOffset], rowInspector, reporter, 0); if (LOG.isDebugEnabled()) { LOG.debug("Created updater for bucket number " + bucketNum + " using file " + fpaths.outPaths[writerOffset]); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 17f3d02993..375e6358a6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -1011,7 +1011,7 @@ public String toString() { static class ACIDSplitStrategy implements SplitStrategy { private Path dir; private List deltas; - private boolean[] covered; + private boolean[] covered;//true means this bucket has a base (and maybe some deltas) private int numBuckets; private AcidOperationalProperties acidOperationalProperties; @@ -1633,7 +1633,7 @@ private long computeProjectionSize(List fileTypes, pathFutures.add(ecs.submit(fileGenerator)); } - boolean isTransactionalTableScan = + boolean isTransactionalTableScan =//this never seems to be set correctly HiveConf.getBoolVar(conf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN); boolean isSchemaEvolution = HiveConf.getBoolVar(conf, ConfVars.HIVE_SCHEMA_EVOLUTION); TypeDescription readerSchema = diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java index 97c4e3df61..f6d6323003 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java @@ -22,6 +22,8 @@ import java.util.Map; import java.util.TreeMap; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.AcidOutputFormat; import org.apache.hadoop.hive.ql.io.BucketCodec; @@ -68,7 +70,6 @@ private final RecordIdentifier maxKey; // an extra value so that we can return it while reading ahead private OrcStruct extraValue; - /** * A RecordIdentifier extended with the current transaction id. This is the * key of our merge sort with the originalTransaction, bucket, and rowId @@ -685,6 +686,7 @@ private KeyInterval discoverOriginalKeyBounds(Reader reader, int bucket, break; } } + //todo: what is "bucket" for unbucketed tables? 0? if (rowOffset > 0) { minKey = new RecordIdentifier(0, bucketProperty, rowOffset - 1); } @@ -740,7 +742,6 @@ private KeyInterval discoverKeyBounds(Reader reader, */ static Reader.Options createEventOptions(Reader.Options options) { Reader.Options result = options.clone(); - //result.range(options.getOffset(), Long.MAX_VALUE);WTF? result.include(options.getInclude()); // slide the column names down by 6 for the name array @@ -820,6 +821,33 @@ Path getBucketPath() { this.offset = options.getOffset(); this.length = options.getLength(); this.validTxnList = validTxnList; + /** + * @since Hive 3.0 + * With split update (HIVE-14035) we have base/, delta/ and delete_delta/ - the latter only + * has Delete events and the others only have Insert events. Thus {@link #baseReader} is + * a split of a file in base/ or delta/. + * + * For Compaction, each split (for now) is a logical bucket, i.e. all files from base/ + delta(s)/ + * for a given bucket ID and delete_delta(s)/ + * + * For bucketed tables, the data files are named bucket_N and all rows in this file are such + * that {@link org.apache.hadoop.hive.ql.io.BucketCodec#decodeWriterId(int)} of + * {@link RecordIdentifier#getBucketProperty()} is N. This is currently true for all types of + * files but may not be true for for delete_delta/ files in the future. + * + * For un-bucketed tables, the system is designed so that it works when there is no relationship + * between data file name (bucket_N) and the value of {@link RecordIdentifier#getBucketProperty()}. + * (Later we this maybe optimized to take advantage of situations where it is known that + * bucket_N matches bucketProperty().) This implies that for a given {@link baseReader} all + * files in delete_delta/ have to be opened ({@link ReaderPair} created). + * + * Compactor for un-bucketed tables works exactly the same as for bucketed ones though it + * should be optimized. + * + * In both cases, Compactor can be changed os that Minor compaction is run very often and only + * compacts delete_delta/. Major compaction can do what it does now. + */ + boolean isBucketed = conf.getInt(hive_metastoreConstants.BUCKET_COUNT, 0) > 0; TypeDescription typeDescr = OrcInputFormat.getDesiredRowTypeDescr(conf, true, Integer.MAX_VALUE); @@ -868,35 +896,30 @@ Path getBucketPath() { baseReader = pair.getRecordReader(); } - // we always want to read all of the deltas - eventOptions.range(0, Long.MAX_VALUE); if (deltaDirectory != null) { + /*whatever SARG maybe applicable to base it's not applicable to delete_delta since it has no + * user columns + * todo: we should make a SARG to push down min/max key to delete_delta*/ + Reader.Options deltaEventOptions = eventOptions.clone() + .searchArgument(null, null).range(0, Long.MAX_VALUE); for(Path delta: deltaDirectory) { if(!mergerOptions.isCompacting() && !AcidUtils.isDeleteDelta(delta)) { //all inserts should be in baseReader for normal read so this should always be delete delta if not compacting throw new IllegalStateException(delta + " is not delete delta and is not compacting."); } ReaderKey key = new ReaderKey(); - Path deltaFile = AcidUtils.createBucketFile(delta, bucket); AcidUtils.ParsedDelta deltaDir = AcidUtils.parsedDelta(delta); - FileSystem fs = deltaFile.getFileSystem(conf); - long length = OrcAcidUtils.getLastFlushLength(fs, deltaFile); - if (length != -1 && fs.exists(deltaFile)) { - Reader deltaReader = OrcFile.createReader(deltaFile, - OrcFile.readerOptions(conf).maxLength(length)); - Reader.Options deltaEventOptions = null; - if(eventOptions.getSearchArgument() != null) { - // Turn off the sarg before pushing it to delta. We never want to push a sarg to a delta as - // it can produce wrong results (if the latest valid version of the record is filtered out by - // the sarg) or ArrayOutOfBounds errors (when the sarg is applied to a delete record) - // unless the delta only has insert events - AcidStats acidStats = OrcAcidUtils.parseAcidStats(deltaReader); - if(acidStats.deletes > 0 || acidStats.updates > 0) { - deltaEventOptions = eventOptions.clone().searchArgument(null, null); - } + for (Path deltaFile : getDeltaFiles(delta, bucket, conf, mergerOptions, isBucketed)) { + FileSystem fs = deltaFile.getFileSystem(conf); + if(!fs.exists(deltaFile)) { + continue; } + //only streaming ingest can produce side files and only on files with insert events + assert !fs.exists(OrcAcidUtils.getSideFile(deltaFile)) : "Unexpected file found: " + + OrcAcidUtils.getSideFile(deltaFile); + Reader deltaReader = OrcFile.createReader(deltaFile, OrcFile.readerOptions(conf)); ReaderPairAcid deltaPair = new ReaderPairAcid(key, deltaReader, minKey, maxKey, - deltaEventOptions != null ? deltaEventOptions : eventOptions, deltaDir.getStatementId()); + deltaEventOptions, deltaDir.getStatementId()); if (deltaPair.nextRecord() != null) { readers.put(key, deltaPair); } @@ -921,6 +944,58 @@ Path getBucketPath() { } } + /** + * This determines the set of {@link ReaderPairAcid} to create. + * For unbucketed tables {@code bucket} can be thought of as a write tranche. + */ + static Path[] getDeltaFiles(Path deltaDirectory, int bucket, Configuration conf, Options mergerOptions, boolean isBucketed) throws IOException { + if(isBucketed) { + /** + * for bucketed tables (for now) we always trust that the N in bucketN file name means that + * all records have {@link RecordIdentifier#getBucketProperty()} encoding bucketId = N. This + * means that a delete event in bucketN can only modify an insert in another bucketN file for + * the same N. (Down the road we may trust it only in certain delta dirs) + * + * Compactor takes all types of deltas for a given bucket. For regular read, any file that + * contains (only) insert events is treated as base and only + * delete_delta/ are treated as deltas. + */ + assert (!mergerOptions.isCompacting && + deltaDirectory.getName().startsWith(AcidUtils.DELETE_DELTA_PREFIX) + ) || mergerOptions.isCompacting : "Unexpected delta: " + deltaDirectory; + Path deltaFile = AcidUtils.createBucketFile(deltaDirectory, bucket); + return new Path[]{deltaFile}; + } + /** + * For unbucketed tables insert events are also stored in bucketN files but here N is + * the writer ID. We can trust that N matches info in {@link RecordIdentifier#getBucketProperty()} + * delta_x_y but it's not required since we can't trust N for delete_delta_x_x/bucketN. + * Thus we always have to take all files in a delete_delta. + * For regular read, any file that has (only) insert events is treated as base so + * {@link deltaDirectory} can only be delete_delta and so we take all files in it. + * For compacting, every split contains base/bN + delta(s)/bN + delete_delta(s){all buckets} for + * a given N. + */ + if(deltaDirectory.getName().startsWith(AcidUtils.DELETE_DELTA_PREFIX)) { + //it's not wrong to take all delete events for bucketed tables but it's more efficient + //to only take those that belong to the 'bucket' assuming we trust the file name + //un-bucketed table - get all files + FileSystem fs = deltaDirectory.getFileSystem(conf); + FileStatus[] dataFiles = fs.listStatus(deltaDirectory, AcidUtils.bucketFileFilter); + Path[] deltaFiles = new Path[dataFiles.length]; + int i = 0; + for (FileStatus stat : dataFiles) { + deltaFiles[i++] = stat.getPath(); + }//todo: need a test where we actually have more than 1 file + return deltaFiles; + } + //if here it must be delta_x_y - insert events only, so we must be compacting + assert mergerOptions.isCompacting() : "Expected to be called as part of compaction"; + Path deltaFile = AcidUtils.createBucketFile(deltaDirectory, bucket); + return new Path[] {deltaFile}; + + } + @VisibleForTesting RecordIdentifier getMinKey() { return minKey; diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java index 429960b771..df768eb5ea 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java @@ -243,7 +243,7 @@ static StructObjectInspector createEventSchema(ObjectInspector rowInspector) { } if (options.getMinimumTransactionId() != options.getMaximumTransactionId() && !options.isWritingBase()){ - flushLengths = fs.create(OrcAcidUtils.getSideFile(this.path), true, 8, + flushLengths = fs.create(OrcAcidUtils.getSideFile(this.path), false, 8, options.getReporter()); flushLengths.writeLong(0); OrcInputFormat.SHIMS.hflush(flushLengths); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java index 8f807102dd..d2401d55d4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector; @@ -530,6 +531,9 @@ public String toString() { * For every call to next(), it returns the next smallest record id in the file if available. * Internally, the next() buffers a row batch and maintains an index pointer, reading the * next batch when the previous batch is exhausted. + * + * For unbucketed tables this will currently return all delete events. Once we trust that + * the N in bucketN for "base" spit is reliable, all delete events not matching N can be skipped. */ static class DeleteReaderValue { private VectorizedRowBatch batch; @@ -538,9 +542,10 @@ public String toString() { private final int bucketForSplit; // The bucket value should be same for all the records. private final ValidTxnList validTxnList; private boolean isBucketPropertyRepeating; + private final boolean isBucketedTable; public DeleteReaderValue(Reader deleteDeltaReader, Reader.Options readerOptions, int bucket, - ValidTxnList validTxnList) throws IOException { + ValidTxnList validTxnList, boolean isBucketedTable) throws IOException { this.recordReader = deleteDeltaReader.rowsOptions(readerOptions); this.bucketForSplit = bucket; this.batch = deleteDeltaReader.getSchema().createRowBatch(); @@ -549,6 +554,7 @@ public DeleteReaderValue(Reader deleteDeltaReader, Reader.Options readerOptions, } this.indexPtrInBatch = 0; this.validTxnList = validTxnList; + this.isBucketedTable = isBucketedTable; checkBucketId();//check 1st batch } @@ -615,6 +621,13 @@ private void checkBucketId() throws IOException { * either the split computation got messed up or we found some corrupted records. */ private void checkBucketId(int bucketPropertyFromRecord) throws IOException { + if(!isBucketedTable) { + /** + * in this case a file inside a delete_delta_x_y/bucketN may contain any value for + * bucketId in {@link RecordIdentifier#getBucketProperty()} + */ + return; + } int bucketIdFromRecord = BucketCodec.determineVersion(bucketPropertyFromRecord) .decodeWriterId(bucketPropertyFromRecord); if(bucketIdFromRecord != bucketForSplit) { @@ -686,14 +699,16 @@ public ColumnizedDeleteEventRegistry(JobConf conf, OrcSplit orcSplit, this.rowIds = null; this.compressedOtids = null; int maxEventsInMemory = HiveConf.getIntVar(conf, ConfVars.HIVE_TRANSACTIONAL_NUM_EVENTS_IN_MEMORY); + final boolean isBucketedTable = conf.getInt(hive_metastoreConstants.BUCKET_COUNT, 0) > 0; try { final Path[] deleteDeltaDirs = getDeleteDeltaDirsFromSplit(orcSplit); if (deleteDeltaDirs.length > 0) { int totalDeleteEventCount = 0; for (Path deleteDeltaDir : deleteDeltaDirs) { - Path deleteDeltaFile = AcidUtils.createBucketFile(deleteDeltaDir, bucket); - FileSystem fs = deleteDeltaFile.getFileSystem(conf); + FileSystem fs = deleteDeltaDir.getFileSystem(conf); + for(Path deleteDeltaFile : OrcRawRecordMerger.getDeltaFiles(deleteDeltaDir, bucket, conf, + new OrcRawRecordMerger.Options().isCompacting(false), isBucketedTable)) { // NOTE: Calling last flush length below is more for future-proofing when we have // streaming deletes. But currently we don't support streaming deletes, and this can // be removed if this becomes a performance issue. @@ -721,7 +736,7 @@ public ColumnizedDeleteEventRegistry(JobConf conf, OrcSplit orcSplit, throw new DeleteEventsOverflowMemoryException(); } DeleteReaderValue deleteReaderValue = new DeleteReaderValue(deleteDeltaReader, - readerOptions, bucket, validTxnList); + readerOptions, bucket, validTxnList, isBucketedTable); DeleteRecordKey deleteRecordKey = new DeleteRecordKey(); if (deleteReaderValue.next(deleteRecordKey)) { sortMerger.put(deleteRecordKey, deleteReaderValue); @@ -730,6 +745,7 @@ public ColumnizedDeleteEventRegistry(JobConf conf, OrcSplit orcSplit, } } } + } if (totalDeleteEventCount > 0) { // Initialize the rowId array when we have some delete events. rowIds = new long[totalDeleteEventCount]; diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java index 76aa39f0cb..95224d2340 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java @@ -206,6 +206,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if(!VirtualColumn.ROWID.getTypeInfo().equals(ci.getType())) { throw new IllegalStateException("expected 1st column to be ROW__ID but got wrong type: " + ci.toString()); } + //todo: not sure this is correct... I don't think is gets wrapped in UDFToInteger.... bucketColumns.add(new ExprNodeColumnDesc(ci)); } else { if (!destTable.getSortCols().isEmpty()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 4faec05bb0..2f715e31b5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -7374,9 +7374,6 @@ String fixCtasColumnName(String colName) { } // Check constraints on acid tables. This includes - // * no insert overwrites - // * no use of vectorization - // * turns off reduce deduplication optimization, as that sometimes breaks acid // * Check that the table is bucketed // * Check that the table is not sorted // This method assumes you have already decided that this is an Acid write. Don't call it if @@ -7394,9 +7391,6 @@ These props are now enabled elsewhere (see commit diffs). It would be better in */ conf.set(AcidUtils.CONF_ACID_KEY, "true"); - if (table.getNumBuckets() < 1) { - throw new SemanticException(ErrorMsg.ACID_OP_ON_NONACID_TABLE, table.getTableName()); - } if (table.getSortCols() != null && table.getSortCols().size() > 0) { throw new SemanticException(ErrorMsg.ACID_NO_SORTED_BUCKETS, table.getTableName()); } diff --git ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java index 5e2146ea0e..04ef7fc86a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java +++ ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java @@ -41,6 +41,7 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.metastore.txn.CompactionInfo; import org.apache.hadoop.hive.metastore.txn.TxnStore; import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter; @@ -93,7 +94,7 @@ static final private String IS_MAJOR = "hive.compactor.is.major"; static final private String IS_COMPRESSED = "hive.compactor.is.compressed"; static final private String TABLE_PROPS = "hive.compactor.table.props"; - static final private String NUM_BUCKETS = "hive.compactor.num.buckets"; + static final private String NUM_BUCKETS = hive_metastoreConstants.BUCKET_COUNT; static final private String BASE_DIR = "hive.compactor.base.dir"; static final private String DELTA_DIRS = "hive.compactor.delta.dirs"; static final private String DIRS_TO_SEARCH = "hive.compactor.dirs.to.search"; diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java index bff9884aa1..a015b187f5 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java @@ -69,68 +69,17 @@ * Tests here are for multi-statement transactions (WIP) and those that don't need to * run with Acid 2.0 (see subclasses of TestTxnCommands2) */ -public class TestTxnCommands { +public class TestTxnCommands extends TestTxnCommandsBase { static final private Logger LOG = LoggerFactory.getLogger(TestTxnCommands.class); private static final String TEST_DATA_DIR = new File(System.getProperty("java.io.tmpdir") + File.separator + TestTxnCommands.class.getCanonicalName() + "-" + System.currentTimeMillis() ).getPath().replaceAll("\\\\", "/"); - private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; - //bucket count for test tables; set it to 1 for easier debugging - private static int BUCKET_COUNT = 2; - @Rule - public TestName testName = new TestName(); - private HiveConf hiveConf; - private Driver d; - private static enum Table { - ACIDTBL("acidTbl"), - ACIDTBLPART("acidTblPart"), - ACIDTBL2("acidTbl2"), - NONACIDORCTBL("nonAcidOrcTbl"), - NONACIDORCTBL2("nonAcidOrcTbl2"); - - private final String name; - @Override - public String toString() { - return name; - } - Table(String name) { - this.name = name; - } + @Override + String getTestDataDir() { + return TEST_DATA_DIR; } - @Before - public void setUp() throws Exception { - tearDown(); - hiveConf = new HiveConf(this.getClass()); - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); - hiveConf.setVar(HiveConf.ConfVars.HIVEMAPREDMODE, "nonstrict"); - hiveConf.setVar(HiveConf.ConfVars.HIVEINPUTFORMAT, HiveInputFormat.class.getName()); - hiveConf - .setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, - "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); - hiveConf.setBoolVar(HiveConf.ConfVars.MERGE_CARDINALITY_VIOLATION_CHECK, true); - TxnDbUtil.setConfValues(hiveConf); - TxnDbUtil.prepDb(); - File f = new File(TEST_WAREHOUSE_DIR); - if (f.exists()) { - FileUtil.fullyDelete(f); - } - if (!(new File(TEST_WAREHOUSE_DIR).mkdirs())) { - throw new RuntimeException("Could not create " + TEST_WAREHOUSE_DIR); - } - SessionState.start(new SessionState(hiveConf)); - d = new Driver(hiveConf); - d.setMaxRows(10000); - dropTables(); - runStatementOnDriver("create table " + Table.ACIDTBL + "(a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='true')"); - runStatementOnDriver("create table " + Table.ACIDTBLPART + "(a int, b int) partitioned by (p string) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='true')"); - runStatementOnDriver("create table " + Table.NONACIDORCTBL + "(a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='false')"); - runStatementOnDriver("create table " + Table.NONACIDORCTBL2 + "(a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='false')"); - runStatementOnDriver("create temporary table " + Table.ACIDTBL2 + "(a int, b int, c int) clustered by (c) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='true')"); - } private void dropTables() throws Exception { for(Table t : Table.values()) { runStatementOnDriver("drop table if exists " + t); @@ -150,7 +99,7 @@ public void tearDown() throws Exception { FileUtils.deleteDirectory(new File(TEST_DATA_DIR)); } } - @Test + @Test//todo: what is this for? public void testInsertOverwrite() throws Exception { runStatementOnDriver("insert overwrite table " + Table.NONACIDORCTBL + " select a,b from " + Table.NONACIDORCTBL2); runStatementOnDriver("create table " + Table.NONACIDORCTBL2 + "3(a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='false')"); @@ -172,7 +121,7 @@ private void dumpBucketData(Table table, long txnId, int stmtId, int bucketNum) if(true) { return; } - Path bucket = AcidUtils.createBucketFile(new Path(new Path(TEST_WAREHOUSE_DIR, table.toString().toLowerCase()), AcidUtils.deltaSubdir(txnId, txnId, stmtId)), bucketNum); + Path bucket = AcidUtils.createBucketFile(new Path(new Path(getWarehouseDir(), table.toString().toLowerCase()), AcidUtils.deltaSubdir(txnId, txnId, stmtId)), bucketNum); FileOutputStream delta = new FileOutputStream(testName.getMethodName() + "_" + bucket.getParent().getName() + "_" + bucket.getName()); // try { // FileDump.printJsonData(hiveConf, bucket.toString(), delta); @@ -490,33 +439,6 @@ private static void pause(int timeMillis) { } } - /** - * takes raw data and turns it into a string as if from Driver.getResults() - * sorts rows in dictionary order - */ - private List stringifyValues(int[][] rowsIn) { - return TestTxnCommands2.stringifyValues(rowsIn); - } - private String makeValuesClause(int[][] rows) { - return TestTxnCommands2.makeValuesClause(rows); - } - - private List runStatementOnDriver(String stmt) throws Exception { - CommandProcessorResponse cpr = d.run(stmt); - if(cpr.getResponseCode() != 0) { - throw new RuntimeException(stmt + " failed: " + cpr); - } - List rs = new ArrayList(); - d.getResults(rs); - return rs; - } - private CommandProcessorResponse runStatementOnDriverNegative(String stmt) throws Exception { - CommandProcessorResponse cpr = d.run(stmt); - if(cpr.getResponseCode() != 0) { - return cpr; - } - throw new RuntimeException("Didn't get expected failure!"); - } @Test public void exchangePartition() throws Exception { @@ -940,4 +862,4 @@ public void testMoreBucketsThanReducers2() throws Exception { int[][] expected = {{0, -1},{0, -1}, {1, -1}, {1, -1}, {2, -1}, {2, -1}, {3, -1}, {3, -1}}; Assert.assertEquals(stringifyValues(expected), r); } -} +} \ No newline at end of file diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java index 0e0fca313e..5d53be2d7a 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java @@ -304,8 +304,8 @@ public void testOriginalFileReaderWhenNonAcidConvertedToAcid() throws Exception // 1. Insert five rows to Non-ACID table. runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(1,2),(3,4),(5,6),(7,8),(9,10)"); - // 2. Convert NONACIDORCTBL to ACID table. - runStatementOnDriver("alter table " + Table.NONACIDORCTBL + " SET TBLPROPERTIES ('transactional'='true')"); + // 2. Convert NONACIDORCTBL to ACID table. //todo: remove trans_prop after HIVE-17089 + runStatementOnDriver("alter table " + Table.NONACIDORCTBL + " SET TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default')"); runStatementOnDriver("update " + Table.NONACIDORCTBL + " set b = b*2 where b in (4,10)"); runStatementOnDriver("delete from " + Table.NONACIDORCTBL + " where a = 7"); @@ -341,8 +341,8 @@ public void testNonAcidToAcidConversion02() throws Exception { //create 1 row in a file 000001_0_copy2 (and empty 000000_0_copy2?) runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(1,6)"); - //convert the table to Acid - runStatementOnDriver("alter table " + Table.NONACIDORCTBL + " SET TBLPROPERTIES ('transactional'='true')"); + //convert the table to Acid //todo: remove trans_prop after HIVE-17089 + runStatementOnDriver("alter table " + Table.NONACIDORCTBL + " SET TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default')"); List rs1 = runStatementOnDriver("describe "+ Table.NONACIDORCTBL); //create a some of delta directories runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(0,15),(1,16)"); diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java new file mode 100644 index 0000000000..793c4f565b --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java @@ -0,0 +1,161 @@ +package org.apache.hadoop.hive.ql; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +public class TestTxnCommands3 extends TestTxnCommandsBase{ + static final private Logger LOG = LoggerFactory.getLogger(TestTxnCommands3.class); + private static final String TEST_DATA_DIR = new File(System.getProperty("java.io.tmpdir") + + File.separator + TestTxnCommands3.class.getCanonicalName() + + "-" + System.currentTimeMillis() + ).getPath().replaceAll("\\\\", "/"); + @Override + String getTestDataDir() { + return TEST_DATA_DIR; + } + @Override + @Before + public void setUp() throws Exception { + setUpInternal(); + hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true); + } + @Test + public void testNoBucketsTableCreate() throws Exception { + CommandProcessorResponse cpr = runStatementOnDriverNegative("create table nobuckets" + + "(c1 integer, c2 integer, c3 integer) stored as orc tblproperties('transactional'='true')"); + Assert.assertTrue(cpr.getErrorMessage().contains("The table must be bucketed and stored using")); + } + + /** + * Tests that Acid can work with un-bucketed tables. + * todo: can the same test be run on Tez w/o copy-paste of the code? maybe make another test with + * more data and use https://github.com/apache/hive/blob/master/ql/src/test/queries/clientpositive/orc_merge3.q#L25 + * to record file names in .q.out file? + * @throws Exception + */ + @Test + public void testNoBuckets() throws Exception { + int[][] sourceVals1 = {{0,0,0},{3,3,3}}; + int[][] sourceVals2 = {{1,1,1},{2,2,2}}; + runStatementOnDriver("create table tmp (c1 integer, c2 integer, c3 integer) stored as orc"); + runStatementOnDriver("insert into tmp " + makeValuesClause(sourceVals1)); + runStatementOnDriver("insert into tmp " + makeValuesClause(sourceVals2)); + runStatementOnDriver("create table nobuckets (c1 integer, c2 integer, c3 integer) stored " + + "as orc tblproperties('transactional'='true', 'transactional_properties'='default')"); + String stmt = "insert into nobuckets select * from tmp"; + runStatementOnDriver(stmt); + List rs = runStatementOnDriver( + "select ROW__ID, c1, c2, c3, INPUT__FILE__NAME from nobuckets order by ROW__ID"); + Assert.assertEquals("", 4, rs.size()); + LOG.warn("after insert"); + for(String s : rs) { + LOG.warn(s); + } + /**the insert creates 2 output files (presumably because there are 2 input files) + * The number in the file name is writerId. This is the number encoded in ROW__ID.bucketId - + * see {@link org.apache.hadoop.hive.ql.io.BucketCodec}*/ + Assert.assertTrue(rs.get(0), rs.get(0).startsWith("{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t0\t")); + Assert.assertTrue(rs.get(0), rs.get(0).endsWith("nobuckets/delta_0000015_0000015_0000/bucket_00000")); + Assert.assertTrue(rs.get(1), rs.get(1).startsWith("{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":1}\t3\t3\t3\t")); + Assert.assertTrue(rs.get(1), rs.get(1).endsWith("nobuckets/delta_0000015_0000015_0000/bucket_00000")); + Assert.assertTrue(rs.get(2), rs.get(2).startsWith("{\"transactionid\":15,\"bucketid\":536936448,\"rowid\":0}\t1\t1\t1\t")); + Assert.assertTrue(rs.get(2), rs.get(2).endsWith("nobuckets/delta_0000015_0000015_0000/bucket_00001")); + Assert.assertTrue(rs.get(3), rs.get(3).startsWith("{\"transactionid\":15,\"bucketid\":536936448,\"rowid\":1}\t2\t2\t2\t")); + Assert.assertTrue(rs.get(3), rs.get(3).endsWith("nobuckets/delta_0000015_0000015_0000/bucket_00001")); + /*RS for update seems to spray randomly... is that OK? maybe as long as all resultant files have different names... will they? + Assuming we name them based on taskId, we should create bucketX and bucketY. + we delete events can be written to bucketX file it could be useful for filter delete for a split by file name since the insert + events seem to be written to a proper bucketX file. In fact this may reduce the number of changes elsewhere like compactor... maybe + But this limits the parallelism - what is worse, you don't know what the parallelism should be until you have a list of all the + input files since bucket count is no longer a metadata property. Also, with late Update split, the file name has already been determined + from taskId so the Insert part won't end up matching the bucketX property necessarily. + With early Update split, the Insert can still be an insert - i.e. go to appropriate bucketX. But deletes will still go wherever (random shuffle) + unless you know all the bucketX files to be read - may not be worth the trouble. + * 2nd: something in FS fails. ArrayIndexOutOfBoundsException: 1 at FileSinkOperator.process(FileSinkOperator.java:779)*/ + runStatementOnDriver("update nobuckets set c3 = 17 where c3 in(0,1)"); + rs = runStatementOnDriver("select ROW__ID, c1, c2, c3, INPUT__FILE__NAME from nobuckets order by INPUT__FILE__NAME, ROW__ID"); + LOG.warn("after update"); + for(String s : rs) { + LOG.warn(s); + } + Assert.assertTrue(rs.get(0), rs.get(0).startsWith("{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":1}\t3\t3\t3\t")); + Assert.assertTrue(rs.get(0), rs.get(0).endsWith("nobuckets/delta_0000015_0000015_0000/bucket_00000")); + Assert.assertTrue(rs.get(1), rs.get(1).startsWith("{\"transactionid\":15,\"bucketid\":536936448,\"rowid\":1}\t2\t2\t2\t")); + Assert.assertTrue(rs.get(1), rs.get(1).endsWith("nobuckets/delta_0000015_0000015_0000/bucket_00001")); + //so update has 1 writer which creates bucket0 where both new rows land + Assert.assertTrue(rs.get(2), rs.get(2).startsWith("{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t17\t")); + Assert.assertTrue(rs.get(2), rs.get(2).endsWith("nobuckets/delta_0000017_0000017_0000/bucket_00000")); + Assert.assertTrue(rs.get(3), rs.get(3).startsWith("{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t17\t")); + Assert.assertTrue(rs.get(3), rs.get(3).endsWith("nobuckets/delta_0000017_0000017_0000/bucket_00000")); + + Set expectedFiles = new HashSet<>(); + //both delete events land in a single bucket0. Each has a different ROW__ID.bucketId value (even writerId in it is different) + expectedFiles.add("ts/delete_delta_0000017_0000017_0000/bucket_00000"); + expectedFiles.add("nobuckets/delta_0000015_0000015_0000/bucket_00000"); + expectedFiles.add("nobuckets/delta_0000015_0000015_0000/bucket_00001"); + expectedFiles.add("nobuckets/delta_0000017_0000017_0000/bucket_00000"); + //check that we get the right files on disk + assertExpectedFileSet(expectedFiles, getWarehouseDir() + "/nobuckets"); + //todo: it would be nice to check the contents of the files... could use orc.FileDump - it has + // methods to print to a supplied stream but those are package private + + runStatementOnDriver("alter table nobuckets compact 'major'"); + TestTxnCommands2.runWorker(hiveConf); + rs = runStatementOnDriver("select ROW__ID, c1, c2, c3, INPUT__FILE__NAME from nobuckets order by INPUT__FILE__NAME, ROW__ID"); + LOG.warn("after major compact"); + for(String s : rs) { + LOG.warn(s); + } + /* +├── base_0000017 +│   ├── bucket_00000 +│   └── bucket_00001 +├── delete_delta_0000017_0000017_0000 +│   └── bucket_00000 +├── delta_0000015_0000015_0000 +│   ├── bucket_00000 +│   └── bucket_00001 +└── delta_0000017_0000017_0000 + └── bucket_00000 + */ + Assert.assertTrue(rs.get(0), rs.get(0).startsWith("{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":1}\t3\t3\t3\t")); + Assert.assertTrue(rs.get(0), rs.get(0).endsWith("nobuckets/base_0000017/bucket_00000")); + Assert.assertTrue(rs.get(1), rs.get(1).startsWith("{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t17\t")); + Assert.assertTrue(rs.get(1), rs.get(1).endsWith("nobuckets/base_0000017/bucket_00000")); + Assert.assertTrue(rs.get(2), rs.get(2).startsWith("{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t17\t")); + Assert.assertTrue(rs.get(2), rs.get(2).endsWith("nobuckets/base_0000017/bucket_00000")); + Assert.assertTrue(rs.get(3), rs.get(3).startsWith("{\"transactionid\":15,\"bucketid\":536936448,\"rowid\":1}\t2\t2\t2\t")); + Assert.assertTrue(rs.get(3), rs.get(3).endsWith("nobuckets/base_0000017/bucket_00001")); + + expectedFiles.clear(); + expectedFiles.add("delete_delta_0000017_0000017_0000/bucket_00000"); + expectedFiles.add("uckets/delta_0000015_0000015_0000/bucket_00000"); + expectedFiles.add("uckets/delta_0000015_0000015_0000/bucket_00001"); + expectedFiles.add("uckets/delta_0000017_0000017_0000/bucket_00000"); + expectedFiles.add("/warehouse/nobuckets/base_0000017/bucket_00000"); + expectedFiles.add("/warehouse/nobuckets/base_0000017/bucket_00001"); + assertExpectedFileSet(expectedFiles, getWarehouseDir() + "/nobuckets"); + + TestTxnCommands2.runCleaner(hiveConf); + rs = runStatementOnDriver("select c1, c2, c3 from nobuckets order by c1, c2, c3"); + int[][] result = {{0,0,17},{1,1,17},{2,2,2},{3,3,3}}; + Assert.assertEquals("Unexpected result after clean", stringifyValues(result), rs); + + expectedFiles.clear(); + expectedFiles.add("nobuckets/base_0000017/bucket_00000"); + expectedFiles.add("nobuckets/base_0000017/bucket_00001"); + assertExpectedFileSet(expectedFiles, getWarehouseDir() + "/nobuckets"); + } + //todo: try Insert with union all +} + diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommandsBase.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommandsBase.java new file mode 100644 index 0000000000..b0e1b1ab7f --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommandsBase.java @@ -0,0 +1,160 @@ +package org.apache.hadoop.hive.ql; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.txn.TxnDbUtil; +import org.apache.hadoop.hive.ql.io.HiveInputFormat; +import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.rules.TestName; + +import java.io.File; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +public abstract class TestTxnCommandsBase { + //bucket count for test tables; set it to 1 for easier debugging + final static int BUCKET_COUNT = 2; + @Rule + public TestName testName = new TestName(); + HiveConf hiveConf; + Driver d; + enum Table { + ACIDTBL("acidTbl"), + ACIDTBLPART("acidTblPart"), + ACIDTBL2("acidTbl2"), + NONACIDORCTBL("nonAcidOrcTbl"), + NONACIDORCTBL2("nonAcidOrcTbl2"); + + final String name; + @Override + public String toString() { + return name; + } + Table(String name) { + this.name = name; + } + } + + @Before + public void setUp() throws Exception { + setUpInternal(); + } + void setUpInternal() throws Exception { + tearDown(); + hiveConf = new HiveConf(this.getClass()); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, getWarehouseDir()); + hiveConf.setVar(HiveConf.ConfVars.HIVEMAPREDMODE, "nonstrict"); + hiveConf.setVar(HiveConf.ConfVars.HIVEINPUTFORMAT, HiveInputFormat.class.getName()); + hiveConf + .setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, + "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); + hiveConf.setBoolVar(HiveConf.ConfVars.MERGE_CARDINALITY_VIOLATION_CHECK, true); + TxnDbUtil.setConfValues(hiveConf); + TxnDbUtil.prepDb(); + File f = new File(getWarehouseDir()); + if (f.exists()) { + FileUtil.fullyDelete(f); + } + if (!(new File(getWarehouseDir()).mkdirs())) { + throw new RuntimeException("Could not create " + getWarehouseDir()); + } + SessionState.start(new SessionState(hiveConf)); + d = new Driver(hiveConf); + d.setMaxRows(10000); + dropTables(); + runStatementOnDriver("create table " + TestTxnCommandsBase.Table.ACIDTBL + "(a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='true')"); + runStatementOnDriver("create table " + TestTxnCommandsBase.Table.ACIDTBLPART + "(a int, b int) partitioned by (p string) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='true')"); + runStatementOnDriver("create table " + TestTxnCommandsBase.Table.NONACIDORCTBL + "(a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='false')"); + runStatementOnDriver("create table " + TestTxnCommandsBase.Table.NONACIDORCTBL2 + "(a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='false')"); + runStatementOnDriver("create temporary table " + TestTxnCommandsBase.Table.ACIDTBL2 + "(a int, b int, c int) clustered by (c) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='true')"); + } + private void dropTables() throws Exception { + for(TestTxnCommandsBase.Table t : TestTxnCommandsBase.Table.values()) { + runStatementOnDriver("drop table if exists " + t); + } + } + @After + public void tearDown() throws Exception { + try { + if (d != null) { + dropTables(); + d.destroy(); + d.close(); + d = null; + } + } finally { + TxnDbUtil.cleanDb(); + FileUtils.deleteDirectory(new File(getTestDataDir())); + } + } + String getWarehouseDir() { + return getTestDataDir() + "/warehouse"; + } + abstract String getTestDataDir(); + /** + * takes raw data and turns it into a string as if from Driver.getResults() + * sorts rows in dictionary order + */ + List stringifyValues(int[][] rowsIn) { + return TestTxnCommands2.stringifyValues(rowsIn); + } + String makeValuesClause(int[][] rows) { + return TestTxnCommands2.makeValuesClause(rows); + } + + List runStatementOnDriver(String stmt) throws Exception { + CommandProcessorResponse cpr = d.run(stmt); + if(cpr.getResponseCode() != 0) { + throw new RuntimeException(stmt + " failed: " + cpr); + } + List rs = new ArrayList(); + d.getResults(rs); + return rs; + } + CommandProcessorResponse runStatementOnDriverNegative(String stmt) throws Exception { + CommandProcessorResponse cpr = d.run(stmt); + if(cpr.getResponseCode() != 0) { + return cpr; + } + throw new RuntimeException("Didn't get expected failure!"); + } + /** + * Will assert that actual files match expected. + * @param expectedFiles - suffixes of expected Paths. Must be the same length + * @param rootPath - table or patition root where to start looking for actual files, recursively + */ + void assertExpectedFileSet(Set expectedFiles, String rootPath) throws Exception { + int suffixLength = 0; + for(String s : expectedFiles) { + if(suffixLength > 0) { + assert suffixLength == s.length() : "all entries must be the same length. current: " + s; + } + suffixLength = s.length(); + } + FileSystem fs = FileSystem.get(hiveConf); + Set actualFiles = new HashSet<>(); + RemoteIterator remoteIterator = fs.listFiles(new Path(rootPath), true); + while (remoteIterator.hasNext()) { + LocatedFileStatus lfs = remoteIterator.next(); + if(!lfs.isDirectory() && org.apache.hadoop.hive.common.FileUtils.HIDDEN_FILES_PATH_FILTER.accept(lfs.getPath())) { + String p = lfs.getPath().toString(); + actualFiles.add(p.substring(p.length() - suffixLength, p.length())); + } + } + Assert.assertEquals("Unexpected file list", expectedFiles, actualFiles); + } +} diff --git ql/src/test/queries/clientpositive/acid_no_buckets.q ql/src/test/queries/clientpositive/acid_no_buckets.q new file mode 100644 index 0000000000..c2f713e9b7 --- /dev/null +++ ql/src/test/queries/clientpositive/acid_no_buckets.q @@ -0,0 +1,210 @@ +--this has 4 groups of tests +--Acid tables w/o bucketing +--the tests with bucketing (make sure we get the same results) +--same tests with and w/o vectorization + +set hive.mapred.mode=nonstrict; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.vectorized.execution.enabled=false; +set hive.explain.user=false; +set hive.merge.cardinality.check=true; + +drop table if exists srcpart_acid; +CREATE TABLE srcpart_acid (key STRING, value STRING) PARTITIONED BY (ds STRING, hr STRING) stored as ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default'); +insert into srcpart_acid PARTITION (ds, hr) select * from srcpart; + +--2 rows for 413, 1 row for 43, 2 for 213, 1 for 44 in kv1.txt (in each partition) +select ds, hr, key, value from srcpart_acid where cast(key as integer) in(413,43) and hr='11' order by ds, hr, cast(key as integer); + +analyze table srcpart_acid PARTITION(ds, hr) compute statistics; +analyze table srcpart_acid PARTITION(ds, hr) compute statistics for columns; +explain update srcpart_acid set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11'; +update srcpart_acid set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11'; +select ds, hr, key, value from srcpart_acid where value like '%updated' order by ds, hr, cast(key as integer); + +insert into srcpart_acid PARTITION (ds='2008-04-08', hr=='11') values ('1001','val1001'),('1002','val1002'),('1003','val1003'); +select ds, hr, key, value from srcpart_acid where cast(key as integer) > 1000 order by ds, hr, cast(key as integer); + +analyze table srcpart_acid PARTITION(ds, hr) compute statistics; +analyze table srcpart_acid PARTITION(ds, hr) compute statistics for columns; +explain delete from srcpart_acid where key in( '1001', '213', '43'); +--delete some rows from initial load, some that were updated and some that were inserted +delete from srcpart_acid where key in( '1001', '213', '43'); + +--make sure we deleted everything that should've been deleted +select count(*) from srcpart_acid where key in( '1001', '213', '43'); +--make sure nothing extra was deleted (2000 + 3 (insert) - 4 - 1 - 8 = 1990) +select count(*) from srcpart_acid; + +--todo: should really have a way to run compactor here.... + +--update should match 1 rows in 1 partition +--delete should drop everything from 1 partition +--insert should do nothing +merge into srcpart_acid t using (select distinct ds, hr, key, value from srcpart_acid) s +on s.ds=t.ds and s.hr=t.hr and s.key=t.key and s.value=t.value +when matched and s.ds='2008-04-08' and s.hr=='11' and s.key='44' then update set value=concat(s.value,'updated by merge') +when matched and s.ds='2008-04-08' and s.hr=='12' then delete +when not matched then insert values('this','should','not','be there'); + +--check results +--should be 0 +select count(*) from srcpart_acid where ds='2008-04-08' and hr=='12'; +--should be 1 rows +select ds, hr, key, value from srcpart_acid where value like '%updated by merge'; +--should be 0 +select count(*) from srcpart_acid where ds = 'this' and hr = 'should' and key = 'not' and value = 'be there'; +drop table if exists srcpart_acid; + + +drop table if exists srcpart_acidb; +CREATE TABLE srcpart_acidb (key STRING, value STRING) PARTITIONED BY (ds STRING, hr STRING) CLUSTERED BY(key) INTO 2 BUCKETS stored as ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default'); +insert into srcpart_acidb PARTITION (ds, hr) select * from srcpart; + +--2 rows for 413, 1 row for 43, 2 for 213, 2 for 12 in kv1.txt (in each partition) +select ds, hr, key, value from srcpart_acidb where cast(key as integer) in(413,43) and hr='11' order by ds, hr, cast(key as integer); + +analyze table srcpart_acidb PARTITION(ds, hr) compute statistics; +analyze table srcpart_acidb PARTITION(ds, hr) compute statistics for columns; +explain update srcpart_acidb set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11'; +update srcpart_acidb set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11'; +select ds, hr, key, value from srcpart_acidb where value like '%updated' order by ds, hr, cast(key as integer); + +insert into srcpart_acidb PARTITION (ds='2008-04-08', hr=='11') values ('1001','val1001'),('1002','val1002'),('1003','val1003'); +select ds, hr, key, value from srcpart_acidb where cast(key as integer) > 1000 order by ds, hr, cast(key as integer); + +analyze table srcpart_acidb PARTITION(ds, hr) compute statistics; +analyze table srcpart_acidb PARTITION(ds, hr) compute statistics for columns; +explain delete from srcpart_acidb where key in( '1001', '213', '43'); +--delete some rows from initial load, some that were updated and some that were inserted +delete from srcpart_acidb where key in( '1001', '213', '43'); + +--make sure we deleted everything that should've been deleted +select count(*) from srcpart_acidb where key in( '1001', '213', '43'); +--make sure nothing extra was deleted (2000 + 3 (insert) - 4 - 1 - 8 = 1990) +select count(*) from srcpart_acidb; + + +--todo: should really have a way to run compactor here.... + +--update should match 1 rows in 1 partition +--delete should drop everything from 1 partition +--insert should do nothing +merge into srcpart_acidb t using (select distinct ds, hr, key, value from srcpart_acidb) s +on s.ds=t.ds and s.hr=t.hr and s.key=t.key and s.value=t.value +when matched and s.ds='2008-04-08' and s.hr=='11' and s.key='44' then update set value=concat(s.value,'updated by merge') +when matched and s.ds='2008-04-08' and s.hr=='12' then delete +when not matched then insert values('this','should','not','be there'); + +--check results +--should be 0 +select count(*) from srcpart_acidb where ds='2008-04-08' and hr=='12'; +--should be 1 rows +select ds, hr, key, value from srcpart_acidb where value like '%updated by merge'; +--should be 0 +select count(*) from srcpart_acidb where ds = 'this' and hr = 'should' and key = 'not' and value = 'be there'; +drop table if exists srcpart_acidb; + + + +--now same thing but vectorized +set hive.vectorized.execution.enabled=true; + +drop table if exists srcpart_acidv; +CREATE TABLE srcpart_acidv (key STRING, value STRING) PARTITIONED BY (ds STRING, hr STRING) stored as ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default'); +insert into srcpart_acidv PARTITION (ds, hr) select * from srcpart; + +--2 rows for 413, 21 row for 43, 2 for 213, 2 for 12 in kv1.txt (in each partition) +select ds, hr, key, value from srcpart_acidv where cast(key as integer) in(413,43) and hr='11' order by ds, hr, cast(key as integer); + +analyze table srcpart_acidv PARTITION(ds, hr) compute statistics; +analyze table srcpart_acidv PARTITION(ds, hr) compute statistics for columns; +explain update srcpart_acidv set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11'; +update srcpart_acidv set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11'; +select ds, hr, key, value from srcpart_acidv where value like '%updated' order by ds, hr, cast(key as integer); + +insert into srcpart_acidv PARTITION (ds='2008-04-08', hr=='11') values ('1001','val1001'),('1002','val1002'),('1003','val1003'); +select ds, hr, key, value from srcpart_acidv where cast(key as integer) > 1000 order by ds, hr, cast(key as integer); + +analyze table srcpart_acidv PARTITION(ds, hr) compute statistics; +analyze table srcpart_acidv PARTITION(ds, hr) compute statistics for columns; +explain delete from srcpart_acidv where key in( '1001', '213', '43'); +--delete some rows from initial load, some that were updated and some that were inserted +delete from srcpart_acidv where key in( '1001', '213', '43'); + +--make sure we deleted everything that should've been deleted +select count(*) from srcpart_acidv where key in( '1001', '213', '43'); +--make sure nothing extra was deleted (2000 + 3 - 4 - 1 - 8 = 1990) +select count(*) from srcpart_acidv; + +--todo: should really have a way to run compactor here.... + +--update should match 1 rows in 1 partition +--delete should drop everything from 1 partition +--insert should do nothing +merge into srcpart_acidv t using (select distinct ds, hr, key, value from srcpart_acidv) s +on s.ds=t.ds and s.hr=t.hr and s.key=t.key and s.value=t.value +when matched and s.ds='2008-04-08' and s.hr=='11' and s.key='44' then update set value=concat(s.value,'updated by merge') +when matched and s.ds='2008-04-08' and s.hr=='12' then delete +when not matched then insert values('this','should','not','be there'); + +--check results +--should be 0 +select count(*) from srcpart_acidv where ds='2008-04-08' and hr=='12'; +--should be 1 rows +select ds, hr, key, value from srcpart_acidv where value like '%updated by merge'; +--should be 0 +select count(*) from srcpart_acidv where ds = 'this' and hr = 'should' and key = 'not' and value = 'be there'; +drop table if exists srcpart_acidv; + + + +drop table if exists srcpart_acidvb; +CREATE TABLE srcpart_acidvb (key STRING, value STRING) PARTITIONED BY (ds STRING, hr STRING) CLUSTERED BY(key) INTO 2 BUCKETS stored as ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default'); +insert into srcpart_acidvb PARTITION (ds, hr) select * from srcpart; + +--2 rows for 413, 1 row for 43, 2 for 213, 2 for 12 in kv1.txt (in each partition) +select ds, hr, key, value from srcpart_acidvb where cast(key as integer) in(413,43) and hr='11' order by ds, hr, cast(key as integer); + +analyze table srcpart_acidvb PARTITION(ds, hr) compute statistics; +analyze table srcpart_acidvb PARTITION(ds, hr) compute statistics for columns; +explain update srcpart_acidvb set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11'; +update srcpart_acidvb set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11'; +select ds, hr, key, value from srcpart_acidvb where value like '%updated' order by ds, hr, cast(key as integer); + +insert into srcpart_acidvb PARTITION (ds='2008-04-08', hr=='11') values ('1001','val1001'),('1002','val1002'),('1003','val1003'); +select ds, hr, key, value from srcpart_acidvb where cast(key as integer) > 1000 order by ds, hr, cast(key as integer); + +analyze table srcpart_acidvb PARTITION(ds, hr) compute statistics; +analyze table srcpart_acidvb PARTITION(ds, hr) compute statistics for columns; +explain delete from srcpart_acidvb where key in( '1001', '213', '43'); +--delete some rows from initial load, some that were updated and some that were inserted +delete from srcpart_acidvb where key in( '1001', '213', '43'); + +--make sure we deleted everything that should've been deleted +select count(*) from srcpart_acidvb where key in( '1001', '213', '43'); +--make sure nothing extra was deleted (2000 + 3 (insert) - 4 - 1 - 8 = 1990) +select count(*) from srcpart_acidvb; + + +--todo: should really have a way to run compactor here.... + +--update should match 1 rows in 1 partition +--delete should drop everything from 1 partition +--insert should do nothing +merge into srcpart_acidvb t using (select distinct ds, hr, key, value from srcpart_acidvb) s +on s.ds=t.ds and s.hr=t.hr and s.key=t.key and s.value=t.value +when matched and s.ds='2008-04-08' and s.hr=='11' and s.key='44' then update set value=concat(s.value,'updated by merge') +when matched and s.ds='2008-04-08' and s.hr=='12' then delete +when not matched then insert values('this','should','not','be there'); + +--check results +--should be 0 +select count(*) from srcpart_acidvb where ds='2008-04-08' and hr=='12'; +--should be 1 rows +select ds, hr, key, value from srcpart_acidvb where value like '%updated by merge'; +--should be 0 +select count(*) from srcpart_acidvb where ds = 'this' and hr = 'should' and key = 'not' and value = 'be there'; +drop table if exists srcpart_acidvb; diff --git ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out new file mode 100644 index 0000000000..34dd4877e8 --- /dev/null +++ ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out @@ -0,0 +1,1976 @@ +PREHOOK: query: drop table if exists srcpart_acid +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists srcpart_acid +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE srcpart_acid (key STRING, value STRING) PARTITIONED BY (ds STRING, hr STRING) stored as ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcpart_acid +POSTHOOK: query: CREATE TABLE srcpart_acid (key STRING, value STRING) PARTITIONED BY (ds STRING, hr STRING) stored as ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcpart_acid +PREHOOK: query: insert into srcpart_acid PARTITION (ds, hr) select * from srcpart +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acid +POSTHOOK: query: insert into srcpart_acid PARTITION (ds, hr) select * from srcpart +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select ds, hr, key, value from srcpart_acid where cast(key as integer) in(413,43) and hr='11' order by ds, hr, cast(key as integer) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acid where cast(key as integer) in(413,43) and hr='11' order by ds, hr, cast(key as integer) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +#### A masked pattern was here #### +2008-04-08 11 43 val_43 +2008-04-08 11 413 val_413 +2008-04-08 11 413 val_413 +2008-04-09 11 43 val_43 +2008-04-09 11 413 val_413 +2008-04-09 11 413 val_413 +PREHOOK: query: analyze table srcpart_acid PARTITION(ds, hr) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +PREHOOK: Output: default@srcpart_acid +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +POSTHOOK: query: analyze table srcpart_acid PARTITION(ds, hr) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Output: default@srcpart_acid +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +PREHOOK: query: analyze table srcpart_acid PARTITION(ds, hr) compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acid +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcpart_acid PARTITION(ds, hr) compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acid +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +PREHOOK: query: explain update srcpart_acid set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +PREHOOK: type: QUERY +POSTHOOK: query: explain update srcpart_acid set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_acid + Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: (UDFToInteger(key)) IN (413, 43) (type: boolean) + Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: ROW__ID (type: struct), key (type: string), concat(value, 'updated') (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acid + Write Type: UPDATE + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds + hr + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acid + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: update srcpart_acid set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: query: update srcpart_acid set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: query: select ds, hr, key, value from srcpart_acid where value like '%updated' order by ds, hr, cast(key as integer) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acid where value like '%updated' order by ds, hr, cast(key as integer) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-08 11 43 val_43updated +2008-04-08 11 413 val_413updated +2008-04-08 11 413 val_413updated +2008-04-09 11 43 val_43updated +2008-04-09 11 413 val_413updated +2008-04-09 11 413 val_413updated +PREHOOK: query: insert into srcpart_acid PARTITION (ds='2008-04-08', hr=='11') values ('1001','val1001'),('1002','val1002'),('1003','val1003') +PREHOOK: type: QUERY +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: query: insert into srcpart_acid PARTITION (ds='2008-04-08', hr=='11') values ('1001','val1001'),('1002','val1002'),('1003','val1003') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select ds, hr, key, value from srcpart_acid where cast(key as integer) > 1000 order by ds, hr, cast(key as integer) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acid where cast(key as integer) > 1000 order by ds, hr, cast(key as integer) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-08 11 1001 val1001 +2008-04-08 11 1002 val1002 +2008-04-08 11 1003 val1003 +PREHOOK: query: analyze table srcpart_acid PARTITION(ds, hr) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +PREHOOK: Output: default@srcpart_acid +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +POSTHOOK: query: analyze table srcpart_acid PARTITION(ds, hr) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Output: default@srcpart_acid +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +PREHOOK: query: analyze table srcpart_acid PARTITION(ds, hr) compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acid +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcpart_acid PARTITION(ds, hr) compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acid +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +PREHOOK: query: explain delete from srcpart_acid where key in( '1001', '213', '43') +PREHOOK: type: QUERY +POSTHOOK: query: explain delete from srcpart_acid where key in( '1001', '213', '43') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_acid + Statistics: Num rows: 2015 Data size: 916825 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: (key) IN ('1001', '213', '43') (type: boolean) + Statistics: Num rows: 20 Data size: 9100 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: ROW__ID (type: struct), ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: string), _col2 (type: string) + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acid + Write Type: DELETE + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds + hr + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acid + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: delete from srcpart_acid where key in( '1001', '213', '43') +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +POSTHOOK: query: delete from srcpart_acid where key in( '1001', '213', '43') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +PREHOOK: query: select count(*) from srcpart_acid where key in( '1001', '213', '43') +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acid where key in( '1001', '213', '43') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +0 +PREHOOK: query: select count(*) from srcpart_acid +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +1990 +PREHOOK: query: merge into srcpart_acid t using (select distinct ds, hr, key, value from srcpart_acid) s +on s.ds=t.ds and s.hr=t.hr and s.key=t.key and s.value=t.value +when matched and s.ds='2008-04-08' and s.hr=='11' and s.key='44' then update set value=concat(s.value,'updated by merge') +when matched and s.ds='2008-04-08' and s.hr=='12' then delete +when not matched then insert values('this','should','not','be there') +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +PREHOOK: Output: default@merge_tmp_table +PREHOOK: Output: default@srcpart_acid +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +POSTHOOK: query: merge into srcpart_acid t using (select distinct ds, hr, key, value from srcpart_acid) s +on s.ds=t.ds and s.hr=t.hr and s.key=t.key and s.value=t.value +when matched and s.ds='2008-04-08' and s.hr=='11' and s.key='44' then update set value=concat(s.value,'updated by merge') +when matched and s.ds='2008-04-08' and s.hr=='12' then delete +when not matched then insert values('this','should','not','be there') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@merge_tmp_table +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(srcpart_acid)t.FieldSchema(name:ROW__ID, type:struct, comment:), (srcpart_acid)t.FieldSchema(name:ds, type:string, comment:null), (srcpart_acid)t.FieldSchema(name:hr, type:string, comment:null), ] +PREHOOK: query: select count(*) from srcpart_acid where ds='2008-04-08' and hr=='12' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acid where ds='2008-04-08' and hr=='12' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +0 +PREHOOK: query: select ds, hr, key, value from srcpart_acid where value like '%updated by merge' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acid where value like '%updated by merge' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-08 11 44 val_44updated by merge +PREHOOK: query: select count(*) from srcpart_acid where ds = 'this' and hr = 'should' and key = 'not' and value = 'be there' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acid where ds = 'this' and hr = 'should' and key = 'not' and value = 'be there' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +#### A masked pattern was here #### +0 +PREHOOK: query: drop table if exists srcpart_acid +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@srcpart_acid +PREHOOK: Output: default@srcpart_acid +POSTHOOK: query: drop table if exists srcpart_acid +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Output: default@srcpart_acid +PREHOOK: query: drop table if exists srcpart_acidb +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists srcpart_acidb +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE srcpart_acidb (key STRING, value STRING) PARTITIONED BY (ds STRING, hr STRING) CLUSTERED BY(key) INTO 2 BUCKETS stored as ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcpart_acidb +POSTHOOK: query: CREATE TABLE srcpart_acidb (key STRING, value STRING) PARTITIONED BY (ds STRING, hr STRING) CLUSTERED BY(key) INTO 2 BUCKETS stored as ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcpart_acidb +PREHOOK: query: insert into srcpart_acidb PARTITION (ds, hr) select * from srcpart +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidb +POSTHOOK: query: insert into srcpart_acidb PARTITION (ds, hr) select * from srcpart +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: srcpart_acidb PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidb PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidb PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidb PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidb PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidb PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidb PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidb PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select ds, hr, key, value from srcpart_acidb where cast(key as integer) in(413,43) and hr='11' order by ds, hr, cast(key as integer) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acidb where cast(key as integer) in(413,43) and hr='11' order by ds, hr, cast(key as integer) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +#### A masked pattern was here #### +2008-04-08 11 43 val_43 +2008-04-08 11 413 val_413 +2008-04-08 11 413 val_413 +2008-04-09 11 43 val_43 +2008-04-09 11 413 val_413 +2008-04-09 11 413 val_413 +PREHOOK: query: analyze table srcpart_acidb PARTITION(ds, hr) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Output: default@srcpart_acidb +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +POSTHOOK: query: analyze table srcpart_acidb PARTITION(ds, hr) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Output: default@srcpart_acidb +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +PREHOOK: query: analyze table srcpart_acidb PARTITION(ds, hr) compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidb +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcpart_acidb PARTITION(ds, hr) compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidb +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +PREHOOK: query: explain update srcpart_acidb set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +PREHOOK: type: QUERY +POSTHOOK: query: explain update srcpart_acidb set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_acidb + Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: (UDFToInteger(key)) IN (413, 43) (type: boolean) + Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: ROW__ID (type: struct), key (type: string), concat(value, 'updated') (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acidb + Write Type: UPDATE + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds + hr + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acidb + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: update srcpart_acidb set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: query: update srcpart_acidb set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: query: select ds, hr, key, value from srcpart_acidb where value like '%updated' order by ds, hr, cast(key as integer) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acidb where value like '%updated' order by ds, hr, cast(key as integer) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-08 11 43 val_43updated +2008-04-08 11 413 val_413updated +2008-04-08 11 413 val_413updated +2008-04-09 11 43 val_43updated +2008-04-09 11 413 val_413updated +2008-04-09 11 413 val_413updated +PREHOOK: query: insert into srcpart_acidb PARTITION (ds='2008-04-08', hr=='11') values ('1001','val1001'),('1002','val1002'),('1003','val1003') +PREHOOK: type: QUERY +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: query: insert into srcpart_acidb PARTITION (ds='2008-04-08', hr=='11') values ('1001','val1001'),('1002','val1002'),('1003','val1003') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: srcpart_acidb PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: srcpart_acidb PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select ds, hr, key, value from srcpart_acidb where cast(key as integer) > 1000 order by ds, hr, cast(key as integer) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acidb where cast(key as integer) > 1000 order by ds, hr, cast(key as integer) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-08 11 1001 val1001 +2008-04-08 11 1002 val1002 +2008-04-08 11 1003 val1003 +PREHOOK: query: analyze table srcpart_acidb PARTITION(ds, hr) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Output: default@srcpart_acidb +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +POSTHOOK: query: analyze table srcpart_acidb PARTITION(ds, hr) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Output: default@srcpart_acidb +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +PREHOOK: query: analyze table srcpart_acidb PARTITION(ds, hr) compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidb +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcpart_acidb PARTITION(ds, hr) compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidb +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +PREHOOK: query: explain delete from srcpart_acidb where key in( '1001', '213', '43') +PREHOOK: type: QUERY +POSTHOOK: query: explain delete from srcpart_acidb where key in( '1001', '213', '43') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_acidb + Statistics: Num rows: 2015 Data size: 916825 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: (key) IN ('1001', '213', '43') (type: boolean) + Statistics: Num rows: 20 Data size: 9100 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: ROW__ID (type: struct), ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: string), _col2 (type: string) + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acidb + Write Type: DELETE + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds + hr + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acidb + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: delete from srcpart_acidb where key in( '1001', '213', '43') +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +POSTHOOK: query: delete from srcpart_acidb where key in( '1001', '213', '43') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +PREHOOK: query: select count(*) from srcpart_acidb where key in( '1001', '213', '43') +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acidb where key in( '1001', '213', '43') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +0 +PREHOOK: query: select count(*) from srcpart_acidb +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acidb +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +1990 +PREHOOK: query: merge into srcpart_acidb t using (select distinct ds, hr, key, value from srcpart_acidb) s +on s.ds=t.ds and s.hr=t.hr and s.key=t.key and s.value=t.value +when matched and s.ds='2008-04-08' and s.hr=='11' and s.key='44' then update set value=concat(s.value,'updated by merge') +when matched and s.ds='2008-04-08' and s.hr=='12' then delete +when not matched then insert values('this','should','not','be there') +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +PREHOOK: Output: default@merge_tmp_table +PREHOOK: Output: default@srcpart_acidb +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +POSTHOOK: query: merge into srcpart_acidb t using (select distinct ds, hr, key, value from srcpart_acidb) s +on s.ds=t.ds and s.hr=t.hr and s.key=t.key and s.value=t.value +when matched and s.ds='2008-04-08' and s.hr=='11' and s.key='44' then update set value=concat(s.value,'updated by merge') +when matched and s.ds='2008-04-08' and s.hr=='12' then delete +when not matched then insert values('this','should','not','be there') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@merge_tmp_table +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(srcpart_acidb)t.FieldSchema(name:ROW__ID, type:struct, comment:), (srcpart_acidb)t.FieldSchema(name:ds, type:string, comment:null), (srcpart_acidb)t.FieldSchema(name:hr, type:string, comment:null), ] +PREHOOK: query: select count(*) from srcpart_acidb where ds='2008-04-08' and hr=='12' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acidb where ds='2008-04-08' and hr=='12' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +0 +PREHOOK: query: select ds, hr, key, value from srcpart_acidb where value like '%updated by merge' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acidb where value like '%updated by merge' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-08 11 44 val_44updated by merge +PREHOOK: query: select count(*) from srcpart_acidb where ds = 'this' and hr = 'should' and key = 'not' and value = 'be there' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acidb where ds = 'this' and hr = 'should' and key = 'not' and value = 'be there' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +#### A masked pattern was here #### +0 +PREHOOK: query: drop table if exists srcpart_acidb +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Output: default@srcpart_acidb +POSTHOOK: query: drop table if exists srcpart_acidb +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Output: default@srcpart_acidb +PREHOOK: query: drop table if exists srcpart_acidv +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists srcpart_acidv +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE srcpart_acidv (key STRING, value STRING) PARTITIONED BY (ds STRING, hr STRING) stored as ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcpart_acidv +POSTHOOK: query: CREATE TABLE srcpart_acidv (key STRING, value STRING) PARTITIONED BY (ds STRING, hr STRING) stored as ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcpart_acidv +PREHOOK: query: insert into srcpart_acidv PARTITION (ds, hr) select * from srcpart +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidv +POSTHOOK: query: insert into srcpart_acidv PARTITION (ds, hr) select * from srcpart +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: srcpart_acidv PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidv PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidv PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidv PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidv PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidv PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidv PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidv PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select ds, hr, key, value from srcpart_acidv where cast(key as integer) in(413,43) and hr='11' order by ds, hr, cast(key as integer) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acidv where cast(key as integer) in(413,43) and hr='11' order by ds, hr, cast(key as integer) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +#### A masked pattern was here #### +2008-04-08 11 43 val_43 +2008-04-08 11 413 val_413 +2008-04-08 11 413 val_413 +2008-04-09 11 43 val_43 +2008-04-09 11 413 val_413 +2008-04-09 11 413 val_413 +PREHOOK: query: analyze table srcpart_acidv PARTITION(ds, hr) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Output: default@srcpart_acidv +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +POSTHOOK: query: analyze table srcpart_acidv PARTITION(ds, hr) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Output: default@srcpart_acidv +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +PREHOOK: query: analyze table srcpart_acidv PARTITION(ds, hr) compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidv +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcpart_acidv PARTITION(ds, hr) compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidv +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +PREHOOK: query: explain update srcpart_acidv set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +PREHOOK: type: QUERY +POSTHOOK: query: explain update srcpart_acidv set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_acidv + Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: (UDFToInteger(key)) IN (413, 43) (type: boolean) + Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: ROW__ID (type: struct), key (type: string), concat(value, 'updated') (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acidv + Write Type: UPDATE + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds + hr + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acidv + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: update srcpart_acidv set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: query: update srcpart_acidv set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: query: select ds, hr, key, value from srcpart_acidv where value like '%updated' order by ds, hr, cast(key as integer) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acidv where value like '%updated' order by ds, hr, cast(key as integer) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-08 11 43 val_43updated +2008-04-08 11 413 val_413updated +2008-04-08 11 413 val_413updated +2008-04-09 11 43 val_43updated +2008-04-09 11 413 val_413updated +2008-04-09 11 413 val_413updated +PREHOOK: query: insert into srcpart_acidv PARTITION (ds='2008-04-08', hr=='11') values ('1001','val1001'),('1002','val1002'),('1003','val1003') +PREHOOK: type: QUERY +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: query: insert into srcpart_acidv PARTITION (ds='2008-04-08', hr=='11') values ('1001','val1001'),('1002','val1002'),('1003','val1003') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: srcpart_acidv PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: srcpart_acidv PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select ds, hr, key, value from srcpart_acidv where cast(key as integer) > 1000 order by ds, hr, cast(key as integer) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acidv where cast(key as integer) > 1000 order by ds, hr, cast(key as integer) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-08 11 1001 val1001 +2008-04-08 11 1002 val1002 +2008-04-08 11 1003 val1003 +PREHOOK: query: analyze table srcpart_acidv PARTITION(ds, hr) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Output: default@srcpart_acidv +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +POSTHOOK: query: analyze table srcpart_acidv PARTITION(ds, hr) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Output: default@srcpart_acidv +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +PREHOOK: query: analyze table srcpart_acidv PARTITION(ds, hr) compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidv +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcpart_acidv PARTITION(ds, hr) compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidv +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +PREHOOK: query: explain delete from srcpart_acidv where key in( '1001', '213', '43') +PREHOOK: type: QUERY +POSTHOOK: query: explain delete from srcpart_acidv where key in( '1001', '213', '43') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_acidv + Statistics: Num rows: 2015 Data size: 916825 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: (key) IN ('1001', '213', '43') (type: boolean) + Statistics: Num rows: 20 Data size: 9100 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: ROW__ID (type: struct), ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: string), _col2 (type: string) + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acidv + Write Type: DELETE + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds + hr + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acidv + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: delete from srcpart_acidv where key in( '1001', '213', '43') +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +POSTHOOK: query: delete from srcpart_acidv where key in( '1001', '213', '43') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +PREHOOK: query: select count(*) from srcpart_acidv where key in( '1001', '213', '43') +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acidv where key in( '1001', '213', '43') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +0 +PREHOOK: query: select count(*) from srcpart_acidv +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acidv +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +1990 +PREHOOK: query: merge into srcpart_acidv t using (select distinct ds, hr, key, value from srcpart_acidv) s +on s.ds=t.ds and s.hr=t.hr and s.key=t.key and s.value=t.value +when matched and s.ds='2008-04-08' and s.hr=='11' and s.key='44' then update set value=concat(s.value,'updated by merge') +when matched and s.ds='2008-04-08' and s.hr=='12' then delete +when not matched then insert values('this','should','not','be there') +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +PREHOOK: Output: default@merge_tmp_table +PREHOOK: Output: default@srcpart_acidv +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +POSTHOOK: query: merge into srcpart_acidv t using (select distinct ds, hr, key, value from srcpart_acidv) s +on s.ds=t.ds and s.hr=t.hr and s.key=t.key and s.value=t.value +when matched and s.ds='2008-04-08' and s.hr=='11' and s.key='44' then update set value=concat(s.value,'updated by merge') +when matched and s.ds='2008-04-08' and s.hr=='12' then delete +when not matched then insert values('this','should','not','be there') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@merge_tmp_table +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(srcpart_acidv)t.FieldSchema(name:ROW__ID, type:struct, comment:), (srcpart_acidv)t.FieldSchema(name:ds, type:string, comment:null), (srcpart_acidv)t.FieldSchema(name:hr, type:string, comment:null), ] +PREHOOK: query: select count(*) from srcpart_acidv where ds='2008-04-08' and hr=='12' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acidv where ds='2008-04-08' and hr=='12' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +0 +PREHOOK: query: select ds, hr, key, value from srcpart_acidv where value like '%updated by merge' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acidv where value like '%updated by merge' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-08 11 44 val_44updated by merge +PREHOOK: query: select count(*) from srcpart_acidv where ds = 'this' and hr = 'should' and key = 'not' and value = 'be there' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acidv where ds = 'this' and hr = 'should' and key = 'not' and value = 'be there' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +#### A masked pattern was here #### +0 +PREHOOK: query: drop table if exists srcpart_acidv +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Output: default@srcpart_acidv +POSTHOOK: query: drop table if exists srcpart_acidv +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Output: default@srcpart_acidv +PREHOOK: query: drop table if exists srcpart_acidvb +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists srcpart_acidvb +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE srcpart_acidvb (key STRING, value STRING) PARTITIONED BY (ds STRING, hr STRING) CLUSTERED BY(key) INTO 2 BUCKETS stored as ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcpart_acidvb +POSTHOOK: query: CREATE TABLE srcpart_acidvb (key STRING, value STRING) PARTITIONED BY (ds STRING, hr STRING) CLUSTERED BY(key) INTO 2 BUCKETS stored as ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcpart_acidvb +PREHOOK: query: insert into srcpart_acidvb PARTITION (ds, hr) select * from srcpart +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidvb +POSTHOOK: query: insert into srcpart_acidvb PARTITION (ds, hr) select * from srcpart +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: srcpart_acidvb PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidvb PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidvb PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidvb PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidvb PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidvb PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidvb PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidvb PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select ds, hr, key, value from srcpart_acidvb where cast(key as integer) in(413,43) and hr='11' order by ds, hr, cast(key as integer) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acidvb where cast(key as integer) in(413,43) and hr='11' order by ds, hr, cast(key as integer) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +#### A masked pattern was here #### +2008-04-08 11 43 val_43 +2008-04-08 11 413 val_413 +2008-04-08 11 413 val_413 +2008-04-09 11 43 val_43 +2008-04-09 11 413 val_413 +2008-04-09 11 413 val_413 +PREHOOK: query: analyze table srcpart_acidvb PARTITION(ds, hr) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Output: default@srcpart_acidvb +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +POSTHOOK: query: analyze table srcpart_acidvb PARTITION(ds, hr) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Output: default@srcpart_acidvb +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +PREHOOK: query: analyze table srcpart_acidvb PARTITION(ds, hr) compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidvb +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcpart_acidvb PARTITION(ds, hr) compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidvb +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +PREHOOK: query: explain update srcpart_acidvb set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +PREHOOK: type: QUERY +POSTHOOK: query: explain update srcpart_acidvb set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_acidvb + Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: (UDFToInteger(key)) IN (413, 43) (type: boolean) + Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: ROW__ID (type: struct), key (type: string), concat(value, 'updated') (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acidvb + Write Type: UPDATE + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds + hr + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acidvb + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: update srcpart_acidvb set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: query: update srcpart_acidvb set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: query: select ds, hr, key, value from srcpart_acidvb where value like '%updated' order by ds, hr, cast(key as integer) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acidvb where value like '%updated' order by ds, hr, cast(key as integer) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-08 11 43 val_43updated +2008-04-08 11 413 val_413updated +2008-04-08 11 413 val_413updated +2008-04-09 11 43 val_43updated +2008-04-09 11 413 val_413updated +2008-04-09 11 413 val_413updated +PREHOOK: query: insert into srcpart_acidvb PARTITION (ds='2008-04-08', hr=='11') values ('1001','val1001'),('1002','val1002'),('1003','val1003') +PREHOOK: type: QUERY +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: query: insert into srcpart_acidvb PARTITION (ds='2008-04-08', hr=='11') values ('1001','val1001'),('1002','val1002'),('1003','val1003') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: srcpart_acidvb PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: srcpart_acidvb PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select ds, hr, key, value from srcpart_acidvb where cast(key as integer) > 1000 order by ds, hr, cast(key as integer) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acidvb where cast(key as integer) > 1000 order by ds, hr, cast(key as integer) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-08 11 1001 val1001 +2008-04-08 11 1002 val1002 +2008-04-08 11 1003 val1003 +PREHOOK: query: analyze table srcpart_acidvb PARTITION(ds, hr) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Output: default@srcpart_acidvb +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +POSTHOOK: query: analyze table srcpart_acidvb PARTITION(ds, hr) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Output: default@srcpart_acidvb +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +PREHOOK: query: analyze table srcpart_acidvb PARTITION(ds, hr) compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidvb +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcpart_acidvb PARTITION(ds, hr) compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidvb +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +PREHOOK: query: explain delete from srcpart_acidvb where key in( '1001', '213', '43') +PREHOOK: type: QUERY +POSTHOOK: query: explain delete from srcpart_acidvb where key in( '1001', '213', '43') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_acidvb + Statistics: Num rows: 2015 Data size: 916825 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: (key) IN ('1001', '213', '43') (type: boolean) + Statistics: Num rows: 20 Data size: 9100 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: ROW__ID (type: struct), ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: string), _col2 (type: string) + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acidvb + Write Type: DELETE + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds + hr + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acidvb + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: delete from srcpart_acidvb where key in( '1001', '213', '43') +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +POSTHOOK: query: delete from srcpart_acidvb where key in( '1001', '213', '43') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +PREHOOK: query: select count(*) from srcpart_acidvb where key in( '1001', '213', '43') +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acidvb where key in( '1001', '213', '43') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +0 +PREHOOK: query: select count(*) from srcpart_acidvb +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acidvb +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +1990 +PREHOOK: query: merge into srcpart_acidvb t using (select distinct ds, hr, key, value from srcpart_acidvb) s +on s.ds=t.ds and s.hr=t.hr and s.key=t.key and s.value=t.value +when matched and s.ds='2008-04-08' and s.hr=='11' and s.key='44' then update set value=concat(s.value,'updated by merge') +when matched and s.ds='2008-04-08' and s.hr=='12' then delete +when not matched then insert values('this','should','not','be there') +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +PREHOOK: Output: default@merge_tmp_table +PREHOOK: Output: default@srcpart_acidvb +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +POSTHOOK: query: merge into srcpart_acidvb t using (select distinct ds, hr, key, value from srcpart_acidvb) s +on s.ds=t.ds and s.hr=t.hr and s.key=t.key and s.value=t.value +when matched and s.ds='2008-04-08' and s.hr=='11' and s.key='44' then update set value=concat(s.value,'updated by merge') +when matched and s.ds='2008-04-08' and s.hr=='12' then delete +when not matched then insert values('this','should','not','be there') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@merge_tmp_table +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(srcpart_acidvb)t.FieldSchema(name:ROW__ID, type:struct, comment:), (srcpart_acidvb)t.FieldSchema(name:ds, type:string, comment:null), (srcpart_acidvb)t.FieldSchema(name:hr, type:string, comment:null), ] +PREHOOK: query: select count(*) from srcpart_acidvb where ds='2008-04-08' and hr=='12' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acidvb where ds='2008-04-08' and hr=='12' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +0 +PREHOOK: query: select ds, hr, key, value from srcpart_acidvb where value like '%updated by merge' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acidvb where value like '%updated by merge' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-08 11 44 val_44updated by merge +PREHOOK: query: select count(*) from srcpart_acidvb where ds = 'this' and hr = 'should' and key = 'not' and value = 'be there' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acidvb where ds = 'this' and hr = 'should' and key = 'not' and value = 'be there' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +#### A masked pattern was here #### +0 +PREHOOK: query: drop table if exists srcpart_acidvb +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Output: default@srcpart_acidvb +POSTHOOK: query: drop table if exists srcpart_acidvb +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Output: default@srcpart_acidvb