diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 685b388d84..4f0aa924d2 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -456,7 +456,9 @@ minillap.query.files=acid_bucket_pruning.q,\ llap_stats.q,\ multi_count_distinct_null.q -minillaplocal.query.files=acid_globallimit.q,\ +minillaplocal.query.files=\ + acid_no_buckets.q, \ + acid_globallimit.q,\ acid_vectorization_missing_cols.q,\ alter_merge_stats_orc.q,\ auto_join30.q,\ diff --git metastore/src/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java metastore/src/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java index 0f08f434e7..0afa2e3d03 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java @@ -105,6 +105,8 @@ private void handleAlterTableTransactionalProp(PreAlterTableEvent context) throw } if ("true".equalsIgnoreCase(transactionalValue)) { if (!conformToAcid(newTable)) { + //todo: fix this later - .q.out file noise + //throw new MetaException("The table must be (bucketed or default) and stored using an ACID compliant" + throw new MetaException("The table must be bucketed and stored using an ACID compliant" + " format (such as ORC)"); } @@ -196,6 +198,8 @@ private void handleCreateTableTransactionalProp(PreCreateTableEvent context) thr if ("true".equalsIgnoreCase(transactionalValue)) { if (!conformToAcid(newTable)) { + //todo: fix this later - .q.out file noise + //throw new MetaException("The table must be (bucketed or default) and stored using an ACID compliant" + throw new MetaException("The table must be bucketed and stored using an ACID compliant" + " format (such as ORC)"); } @@ -220,6 +224,11 @@ private void handleCreateTableTransactionalProp(PreCreateTableEvent context) thr private boolean conformToAcid(Table table) throws MetaException { StorageDescriptor sd = table.getSd(); if (sd.getBucketColsSize() < 1) { + if(DEFAULT_TRANSACTIONAL_PROPERTY.equalsIgnoreCase( + table.getParameters().get(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES))) { + //only acid 2.0 supports un-bucketed tables + return true; + } return false; } diff --git ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index 9c9d4e7897..7d8ea46cf2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -419,6 +419,8 @@ VALUES_TABLE_CONSTRUCTOR_NOT_SUPPORTED(10296, "Values clause with table constructor not yet supported"), ACID_OP_ON_NONACID_TABLE(10297, "Attempt to do update or delete on table {0} that does not use " + + //todo: .q.out file noise + // "an AcidOutputFormat or is (not bucketed or default)", true), "an AcidOutputFormat or is not bucketed", true), ACID_NO_SORTED_BUCKETS(10298, "ACID insert, update, delete not supported on tables that are " + "sorted, table {0}", true), diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java index 8999f6f4f0..b2c95e57de 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConfUtil; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.io.AcidUtils; @@ -148,8 +149,6 @@ RecordWriter[] outWriters; RecordUpdater[] updaters; Stat stat; - int acidLastBucket = -1; - int acidFileOffset = -1; public FSPaths(Path specPath) { tmpPath = Utilities.toTempPath(specPath); @@ -287,6 +286,7 @@ public Stat getStat() { private transient int numFiles; protected transient boolean multiFileSpray; protected transient final Map bucketMap = new HashMap(); + private transient boolean isBucketed = false; private transient ObjectInspector[] partitionObjectInspectors; protected transient HivePartitioner prtner; @@ -347,6 +347,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { isNativeTable = !conf.getTableInfo().isNonNative(); isTemporary = conf.isTemporary(); multiFileSpray = conf.isMultiFileSpray(); + this.isBucketed = hconf.getInt(hive_metastoreConstants.BUCKET_COUNT, 0) > 0; totalFiles = conf.getTotalFiles(); numFiles = conf.getNumFiles(); dpCtx = conf.getDynPartCtx(); @@ -793,9 +794,28 @@ public void process(Object row, int tag) throws HiveException { * Hive.copyFiles() will make one of them bucket_N_copy_M in the final location. The * reset of acid (read path) doesn't know how to handle copy_N files except for 'original' * files (HIVE-16177)*/ + int writerId = -1; + if(!isBucketed) { + assert !multiFileSpray; + assert writerOffset == 0; + /*For un-bucketed tables, ROW__IDs with different 'bucketNum' values can be written to + * the same bucketN file. N in this case is writerId and there is no relationship + * between the file name and any property of the data in it. With acid 2.0 and late + * update split this will be true for Inserts (derived from update) as well. Once we + * move to early update split, then each FileSinkOperator will either see all Delete or + * all Insert events and thus Inserts will be written to bucketN file such that all + * ROW__ID.bucketId indeed contain writerId=N. + * todo: Should we name the file 00000 or something to make it less confusing? + * Since taskId is unique (at least per statementId and thus + * per [delete_]delta_x_y_stmtId/) there will not be any copy_N files. + * + * Oops: compactor relies on bucketN being correct with respect to data in the file for + * bucketed tables. What does it do for non bucketed tables?*/ + writerId = Integer.parseInt(Utilities.getTaskIdFromFilename(taskId)); + } fpaths.updaters[writerOffset] = HiveFileFormatUtils.getAcidRecordUpdater( - jc, conf.getTableInfo(), bucketNum, conf, fpaths.outPaths[writerOffset], - rowInspector, reporter, 0); + jc, conf.getTableInfo(), writerId >= 0 ? writerId : bucketNum, conf, + fpaths.outPaths[writerOffset], rowInspector, reporter, 0); if (LOG.isDebugEnabled()) { LOG.debug("Created updater for bucket number " + bucketNum + " using file " + fpaths.outPaths[writerOffset]); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java index 1e33424e24..683e7ba62e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java @@ -481,6 +481,10 @@ public boolean isSplitUpdate() { public boolean isHashBasedMerge() { return (description & HASH_BASED_MERGE_BIT) > 0; } + public boolean isLegacy() { + return description == 0;//acid 1.0 + } + public int toInt() { return description; diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index de49fc84bb..93d6af0934 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -1004,7 +1004,7 @@ public String toString() { static class ACIDSplitStrategy implements SplitStrategy { Path dir; List deltas; - boolean[] covered; + boolean[] covered;//true means this bucket has a base (and maybe some deltas) int numBuckets; AcidOperationalProperties acidOperationalProperties; @@ -1034,8 +1034,16 @@ public ACIDSplitStrategy(Path dir, int numBuckets, List deltas, b // This happens in the case where a bucket just has deltas and no // base. if (!deltas.isEmpty()) { + if(numBuckets == 0) { + //todo: some UTs don't seem to set up numBuckets properly causing this to misfire + //only split update tables can be un-bucketed + //throw new IllegalStateException("no buckets and no base and not split update"); + //we already handled splitUpdate above so for for legacy tables w/o a base and no buckets + // everything must go to a single split - which is why we should not allow un-bucketed 1.0 tables + } for (int b = 0; b < numBuckets; ++b) { if (!covered[b]) { + //passing 'b' for 'length' here is a hack - it is used to pass bucket Id when there is no base for this bucket splits.add(new OrcSplit(dir, null, b, 0, new String[0], null, false, false, deltas, -1, -1)); } } @@ -1629,7 +1637,7 @@ private long computeProjectionSize(List fileTypes, pathFutures.add(ecs.submit(fileGenerator)); } - boolean isTransactionalTableScan = + boolean isTransactionalTableScan =//this never seems to be set correctly HiveConf.getBoolVar(conf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN); boolean isSchemaEvolution = HiveConf.getBoolVar(conf, ConfVars.HIVE_SCHEMA_EVOLUTION); TypeDescription readerSchema = diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java index 814782a503..0cc1429cfc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java @@ -22,6 +22,8 @@ import java.util.Map; import java.util.TreeMap; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.AcidOutputFormat; import org.apache.hadoop.hive.shims.HadoopShims; @@ -67,7 +69,7 @@ private final RecordIdentifier maxKey; // an extra value so that we can return it while reading ahead private OrcStruct extraValue; - + private final AcidUtils.AcidOperationalProperties acidOperationalProperties; /** * A RecordIdentifier extended with the current transaction id. This is the * key of our merge sort with the originalTransaction, bucket, and rowId @@ -678,6 +680,7 @@ private KeyInterval discoverOriginalKeyBounds(Reader reader, int bucket, break; } } + //todo: what is "bucket" for unbucketed tables? 0? if (rowOffset > 0) { minKey = new RecordIdentifier(0, bucket, rowOffset - 1); } @@ -733,7 +736,6 @@ private KeyInterval discoverKeyBounds(Reader reader, */ static Reader.Options createEventOptions(Reader.Options options) { Reader.Options result = options.clone(); - //result.range(options.getOffset(), Long.MAX_VALUE);WTF? result.include(options.getInclude()); // slide the column names down by 6 for the name array @@ -813,6 +815,34 @@ Path getBucketPath() { this.offset = options.getOffset(); this.length = options.getLength(); this.validTxnList = validTxnList; + /** + * @since Hive 3.0 + * With split update (HIVE-14035) we have base/, delta/ and delete_delta/ - the latter only + * has Delete events and the others only have Insert events. Thus {@link #baseReader} is + * a split of a file in base/ or delta/. + * + * For Compaction, each split (for now) is a logical bucket, i.e. all files from base/ + delta(s)/ + * for a given bucket ID and delete_delta(s)/ + * + * For bucketed tables, the data files are named bucket_N and all rows in this file are such + * that {@link org.apache.hadoop.hive.ql.io.BucketCodec#decodeWriterId(int)} of + * {@link RecordIdentifier#getBucketProperty()} is N. This is currently true for all types of + * files but may not be true for for delete_delta/ files in the future. + * + * For un-bucketed tables, the system is designed so that it works when there is no relationship + * between data file name (bucket_N) and the value of {@link RecordIdentifier#getBucketProperty()}. + * (Later we this maybe optimized to take advantage of situations where it is known that + * bucket_N matches bucketProperty().) This implies that for a given {@link baseReader} all + * files in delete_delta/ have to be opened ({@link ReaderPair} created). + * + * Compactor for un-bucketed tables works exactly the same as for bucketed ones though it + * should be optimized. + * + * In both cases, Compactor can be changed os that Minor compaction is run very often and only + * compacts delete_delta/. Major compaction can do what it does now. + */ + boolean isBucketed = conf.getInt(hive_metastoreConstants.BUCKET_COUNT, 0) > 0; + acidOperationalProperties = AcidUtils.getAcidOperationalProperties(conf); TypeDescription typeDescr = OrcInputFormat.getDesiredRowTypeDescr(conf, true, Integer.MAX_VALUE); @@ -861,33 +891,35 @@ Path getBucketPath() { baseReader = pair.getRecordReader(); } - // we always want to read all of the deltas + // we always want to read all of the deltas - not for split update! couldn't/shouldn't we push down min/max key? eventOptions.range(0, Long.MAX_VALUE); if (deltaDirectory != null) { for(Path delta: deltaDirectory) { - ReaderKey key = new ReaderKey(); - Path deltaFile = AcidUtils.createBucketFile(delta, bucket); AcidUtils.ParsedDelta deltaDir = AcidUtils.parsedDelta(delta); - FileSystem fs = deltaFile.getFileSystem(conf); - long length = OrcAcidUtils.getLastFlushLength(fs, deltaFile); - if (length != -1 && fs.exists(deltaFile)) { - Reader deltaReader = OrcFile.createReader(deltaFile, + for(Path deltaFile : getDeltaBuckets(delta, bucket, conf, mergerOptions, isBucketed)){ + ReaderKey key = new ReaderKey(); + /*todo: for un-bucketed tables we need to take all files in delete_delta*/ + FileSystem fs = deltaFile.getFileSystem(conf); + long length = OrcAcidUtils.getLastFlushLength(fs, deltaFile); + if (length != -1 && fs.exists(deltaFile)) { + Reader deltaReader = OrcFile.createReader(deltaFile, OrcFile.readerOptions(conf).maxLength(length)); - Reader.Options deltaEventOptions = null; - if(eventOptions.getSearchArgument() != null) { - // Turn off the sarg before pushing it to delta. We never want to push a sarg to a delta as - // it can produce wrong results (if the latest valid version of the record is filtered out by - // the sarg) or ArrayOutOfBounds errors (when the sarg is applied to a delete record) - // unless the delta only has insert events - AcidStats acidStats = OrcAcidUtils.parseAcidStats(deltaReader); - if(acidStats.deletes > 0 || acidStats.updates > 0) { - deltaEventOptions = eventOptions.clone().searchArgument(null, null); + Reader.Options deltaEventOptions = null; + if (eventOptions.getSearchArgument() != null) { + // Turn off the sarg before pushing it to delta. We never want to push a sarg to a delta as + // it can produce wrong results (if the latest valid version of the record is filtered out by + // the sarg) or ArrayOutOfBounds errors (when the sarg is applied to a delete record) + // unless the delta only has insert events + AcidStats acidStats = OrcAcidUtils.parseAcidStats(deltaReader); + if (acidStats.deletes > 0 || acidStats.updates > 0) { + deltaEventOptions = eventOptions.clone().searchArgument(null, null); + } + } + ReaderPairAcid deltaPair = new ReaderPairAcid(key, deltaReader, minKey, maxKey, + deltaEventOptions != null ? deltaEventOptions : eventOptions, deltaDir.getStatementId()); + if (deltaPair.nextRecord() != null) { + readers.put(key, deltaPair); } - } - ReaderPairAcid deltaPair = new ReaderPairAcid(key, deltaReader, minKey, maxKey, - deltaEventOptions != null ? deltaEventOptions : eventOptions, deltaDir.getStatementId()); - if (deltaPair.nextRecord() != null) { - readers.put(key, deltaPair); } } } @@ -910,6 +942,47 @@ Path getBucketPath() { } } + /** + * should this include copy_N files? Prasanth says we can have these even from single + * insert statement.... Actually given how the bucketFileFilter is set up, it will include all + * copy_N files. + * @param deltaDirectory + * @param bucket + * @param conf + * @return + * @throws IOException + */ + private Path[] getDeltaBuckets(Path deltaDirectory, int bucket, Configuration conf, Options mergerOptions, boolean isBucketed) throws IOException { + if(isBucketed) { + Path deltaFile = AcidUtils.createBucketFile(deltaDirectory, bucket); + return new Path[]{deltaFile}; + } + if(deltaDirectory.getName().startsWith(AcidUtils.DELETE_DELTA_PREFIX)) { + //it's not wrong to take all delete events for bucketed tables but it could be more efficient + //to only take those that belong to the 'bucket' assuming we trust the file name + //un-bucketed table - get all files + FileSystem fs = deltaDirectory.getFileSystem(conf); + FileStatus[] dataFiles = fs.listStatus(deltaDirectory, AcidUtils.bucketFileFilter); + Path[] deltaFiles = new Path[dataFiles.length]; + int i = 0; + for (FileStatus stat : dataFiles) { + deltaFiles[i++] = stat.getPath(); + }//todo: need a test where we actually have more than 1 file + return deltaFiles; + } + if(acidOperationalProperties.isSplitUpdate()) { + //it must be deleta_x_y - (in acid 2 - insert events only) so we must be compacting + assert mergerOptions.isCompacting() : "Expected to be called as part of compaction"; + //todo: once acid 2.0 is the only option - remove the if + } + /*for near future 2.0 compaction is unchanged, so it uses file names to partition the data into + * buckets. So every split of Compactor, takes base/bN + delta(s)/bN + delete_delta(s){all buckets} + * For regular reads, base and delta are part of "base" and only delete_deltas are "delta"*/ + Path deltaFile = AcidUtils.createBucketFile(deltaDirectory, bucket); + return new Path[] {deltaFile}; + + } + @VisibleForTesting RecordIdentifier getMinKey() { return minKey; diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java index 8f807102dd..5e7b2fcc49 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java @@ -692,6 +692,7 @@ public ColumnizedDeleteEventRegistry(JobConf conf, OrcSplit orcSplit, if (deleteDeltaDirs.length > 0) { int totalDeleteEventCount = 0; for (Path deleteDeltaDir : deleteDeltaDirs) { + //todo: this needs to read all bucket files in delete_delta for unbucketed tables Path deleteDeltaFile = AcidUtils.createBucketFile(deleteDeltaDir, bucket); FileSystem fs = deleteDeltaFile.getFileSystem(conf); // NOTE: Calling last flush length below is more for future-proofing when we have diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java index 76aa39f0cb..95224d2340 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java @@ -206,6 +206,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if(!VirtualColumn.ROWID.getTypeInfo().equals(ci.getType())) { throw new IllegalStateException("expected 1st column to be ROW__ID but got wrong type: " + ci.toString()); } + //todo: not sure this is correct... I don't think is gets wrapped in UDFToInteger.... bucketColumns.add(new ExprNodeColumnDesc(ci)); } else { if (!destTable.getSortCols().isEmpty()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 4faec05bb0..470676a5e6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -7394,7 +7394,7 @@ These props are now enabled elsewhere (see commit diffs). It would be better in */ conf.set(AcidUtils.CONF_ACID_KEY, "true"); - if (table.getNumBuckets() < 1) { + if (table.getNumBuckets() < 1 && !AcidUtils.getAcidOperationalProperties(table).isSplitUpdate()) { throw new SemanticException(ErrorMsg.ACID_OP_ON_NONACID_TABLE, table.getTableName()); } if (table.getSortCols() != null && table.getSortCols().size() > 0) { diff --git ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java index 5e2146ea0e..85079d89fc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java +++ ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java @@ -41,6 +41,7 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.metastore.txn.CompactionInfo; import org.apache.hadoop.hive.metastore.txn.TxnStore; import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter; @@ -93,7 +94,7 @@ static final private String IS_MAJOR = "hive.compactor.is.major"; static final private String IS_COMPRESSED = "hive.compactor.is.compressed"; static final private String TABLE_PROPS = "hive.compactor.table.props"; - static final private String NUM_BUCKETS = "hive.compactor.num.buckets"; + static final private String NUM_BUCKETS = hive_metastoreConstants.BUCKET_COUNT;//"hive.compactor.num.buckets"; static final private String BASE_DIR = "hive.compactor.base.dir"; static final private String DELTA_DIRS = "hive.compactor.delta.dirs"; static final private String DIRS_TO_SEARCH = "hive.compactor.dirs.to.search"; diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java index c50c1a8726..b06e499b85 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java @@ -67,68 +67,17 @@ * Tests here are for multi-statement transactions (WIP) and those that don't need to * run with Acid 2.0 (see subclasses of TestTxnCommands2) */ -public class TestTxnCommands { +public class TestTxnCommands extends TestTxnCommandsBase { static final private Logger LOG = LoggerFactory.getLogger(TestTxnCommands.class); private static final String TEST_DATA_DIR = new File(System.getProperty("java.io.tmpdir") + File.separator + TestTxnCommands.class.getCanonicalName() + "-" + System.currentTimeMillis() ).getPath().replaceAll("\\\\", "/"); - private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; - //bucket count for test tables; set it to 1 for easier debugging - private static int BUCKET_COUNT = 2; - @Rule - public TestName testName = new TestName(); - private HiveConf hiveConf; - private Driver d; - private static enum Table { - ACIDTBL("acidTbl"), - ACIDTBLPART("acidTblPart"), - ACIDTBL2("acidTbl2"), - NONACIDORCTBL("nonAcidOrcTbl"), - NONACIDORCTBL2("nonAcidOrcTbl2"); - - private final String name; - @Override - public String toString() { - return name; - } - Table(String name) { - this.name = name; - } + @Override + String getTestDataDir() { + return TEST_DATA_DIR; } - @Before - public void setUp() throws Exception { - tearDown(); - hiveConf = new HiveConf(this.getClass()); - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); - hiveConf.setVar(HiveConf.ConfVars.HIVEMAPREDMODE, "nonstrict"); - hiveConf.setVar(HiveConf.ConfVars.HIVEINPUTFORMAT, HiveInputFormat.class.getName()); - hiveConf - .setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, - "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); - hiveConf.setBoolVar(HiveConf.ConfVars.MERGE_CARDINALITY_VIOLATION_CHECK, true); - TxnDbUtil.setConfValues(hiveConf); - TxnDbUtil.prepDb(); - File f = new File(TEST_WAREHOUSE_DIR); - if (f.exists()) { - FileUtil.fullyDelete(f); - } - if (!(new File(TEST_WAREHOUSE_DIR).mkdirs())) { - throw new RuntimeException("Could not create " + TEST_WAREHOUSE_DIR); - } - SessionState.start(new SessionState(hiveConf)); - d = new Driver(hiveConf); - d.setMaxRows(10000); - dropTables(); - runStatementOnDriver("create table " + Table.ACIDTBL + "(a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='true')"); - runStatementOnDriver("create table " + Table.ACIDTBLPART + "(a int, b int) partitioned by (p string) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='true')"); - runStatementOnDriver("create table " + Table.NONACIDORCTBL + "(a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='false')"); - runStatementOnDriver("create table " + Table.NONACIDORCTBL2 + "(a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='false')"); - runStatementOnDriver("create temporary table " + Table.ACIDTBL2 + "(a int, b int, c int) clustered by (c) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='true')"); - } private void dropTables() throws Exception { for(Table t : Table.values()) { runStatementOnDriver("drop table if exists " + t); @@ -148,7 +97,7 @@ public void tearDown() throws Exception { FileUtils.deleteDirectory(new File(TEST_DATA_DIR)); } } - @Test + @Test//todo: what is this for? public void testInsertOverwrite() throws Exception { runStatementOnDriver("insert overwrite table " + Table.NONACIDORCTBL + " select a,b from " + Table.NONACIDORCTBL2); runStatementOnDriver("create table " + Table.NONACIDORCTBL2 + "3(a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='false')"); @@ -170,7 +119,7 @@ private void dumpBucketData(Table table, long txnId, int stmtId, int bucketNum) if(true) { return; } - Path bucket = AcidUtils.createBucketFile(new Path(new Path(TEST_WAREHOUSE_DIR, table.toString().toLowerCase()), AcidUtils.deltaSubdir(txnId, txnId, stmtId)), bucketNum); + Path bucket = AcidUtils.createBucketFile(new Path(new Path(getWarehouseDir(), table.toString().toLowerCase()), AcidUtils.deltaSubdir(txnId, txnId, stmtId)), bucketNum); FileOutputStream delta = new FileOutputStream(testName.getMethodName() + "_" + bucket.getParent().getName() + "_" + bucket.getName()); // try { // FileDump.printJsonData(hiveConf, bucket.toString(), delta); @@ -488,33 +437,6 @@ private static void pause(int timeMillis) { } } - /** - * takes raw data and turns it into a string as if from Driver.getResults() - * sorts rows in dictionary order - */ - private List stringifyValues(int[][] rowsIn) { - return TestTxnCommands2.stringifyValues(rowsIn); - } - private String makeValuesClause(int[][] rows) { - return TestTxnCommands2.makeValuesClause(rows); - } - - private List runStatementOnDriver(String stmt) throws Exception { - CommandProcessorResponse cpr = d.run(stmt); - if(cpr.getResponseCode() != 0) { - throw new RuntimeException(stmt + " failed: " + cpr); - } - List rs = new ArrayList(); - d.getResults(rs); - return rs; - } - private CommandProcessorResponse runStatementOnDriverNegative(String stmt) throws Exception { - CommandProcessorResponse cpr = d.run(stmt); - if(cpr.getResponseCode() != 0) { - return cpr; - } - throw new RuntimeException("Didn't get expected failure!"); - } @Test public void exchangePartition() throws Exception { @@ -934,4 +856,4 @@ public void testMoreBucketsThanReducers2() throws Exception { int[][] expected = {{0, -1},{0, -1}, {1, -1}, {1, -1}, {2, -1}, {2, -1}, {3, -1}, {3, -1}}; Assert.assertEquals(stringifyValues(expected), r); } -} +} \ No newline at end of file diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java new file mode 100644 index 0000000000..793c4f565b --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java @@ -0,0 +1,161 @@ +package org.apache.hadoop.hive.ql; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +public class TestTxnCommands3 extends TestTxnCommandsBase{ + static final private Logger LOG = LoggerFactory.getLogger(TestTxnCommands3.class); + private static final String TEST_DATA_DIR = new File(System.getProperty("java.io.tmpdir") + + File.separator + TestTxnCommands3.class.getCanonicalName() + + "-" + System.currentTimeMillis() + ).getPath().replaceAll("\\\\", "/"); + @Override + String getTestDataDir() { + return TEST_DATA_DIR; + } + @Override + @Before + public void setUp() throws Exception { + setUpInternal(); + hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true); + } + @Test + public void testNoBucketsTableCreate() throws Exception { + CommandProcessorResponse cpr = runStatementOnDriverNegative("create table nobuckets" + + "(c1 integer, c2 integer, c3 integer) stored as orc tblproperties('transactional'='true')"); + Assert.assertTrue(cpr.getErrorMessage().contains("The table must be bucketed and stored using")); + } + + /** + * Tests that Acid can work with un-bucketed tables. + * todo: can the same test be run on Tez w/o copy-paste of the code? maybe make another test with + * more data and use https://github.com/apache/hive/blob/master/ql/src/test/queries/clientpositive/orc_merge3.q#L25 + * to record file names in .q.out file? + * @throws Exception + */ + @Test + public void testNoBuckets() throws Exception { + int[][] sourceVals1 = {{0,0,0},{3,3,3}}; + int[][] sourceVals2 = {{1,1,1},{2,2,2}}; + runStatementOnDriver("create table tmp (c1 integer, c2 integer, c3 integer) stored as orc"); + runStatementOnDriver("insert into tmp " + makeValuesClause(sourceVals1)); + runStatementOnDriver("insert into tmp " + makeValuesClause(sourceVals2)); + runStatementOnDriver("create table nobuckets (c1 integer, c2 integer, c3 integer) stored " + + "as orc tblproperties('transactional'='true', 'transactional_properties'='default')"); + String stmt = "insert into nobuckets select * from tmp"; + runStatementOnDriver(stmt); + List rs = runStatementOnDriver( + "select ROW__ID, c1, c2, c3, INPUT__FILE__NAME from nobuckets order by ROW__ID"); + Assert.assertEquals("", 4, rs.size()); + LOG.warn("after insert"); + for(String s : rs) { + LOG.warn(s); + } + /**the insert creates 2 output files (presumably because there are 2 input files) + * The number in the file name is writerId. This is the number encoded in ROW__ID.bucketId - + * see {@link org.apache.hadoop.hive.ql.io.BucketCodec}*/ + Assert.assertTrue(rs.get(0), rs.get(0).startsWith("{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t0\t")); + Assert.assertTrue(rs.get(0), rs.get(0).endsWith("nobuckets/delta_0000015_0000015_0000/bucket_00000")); + Assert.assertTrue(rs.get(1), rs.get(1).startsWith("{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":1}\t3\t3\t3\t")); + Assert.assertTrue(rs.get(1), rs.get(1).endsWith("nobuckets/delta_0000015_0000015_0000/bucket_00000")); + Assert.assertTrue(rs.get(2), rs.get(2).startsWith("{\"transactionid\":15,\"bucketid\":536936448,\"rowid\":0}\t1\t1\t1\t")); + Assert.assertTrue(rs.get(2), rs.get(2).endsWith("nobuckets/delta_0000015_0000015_0000/bucket_00001")); + Assert.assertTrue(rs.get(3), rs.get(3).startsWith("{\"transactionid\":15,\"bucketid\":536936448,\"rowid\":1}\t2\t2\t2\t")); + Assert.assertTrue(rs.get(3), rs.get(3).endsWith("nobuckets/delta_0000015_0000015_0000/bucket_00001")); + /*RS for update seems to spray randomly... is that OK? maybe as long as all resultant files have different names... will they? + Assuming we name them based on taskId, we should create bucketX and bucketY. + we delete events can be written to bucketX file it could be useful for filter delete for a split by file name since the insert + events seem to be written to a proper bucketX file. In fact this may reduce the number of changes elsewhere like compactor... maybe + But this limits the parallelism - what is worse, you don't know what the parallelism should be until you have a list of all the + input files since bucket count is no longer a metadata property. Also, with late Update split, the file name has already been determined + from taskId so the Insert part won't end up matching the bucketX property necessarily. + With early Update split, the Insert can still be an insert - i.e. go to appropriate bucketX. But deletes will still go wherever (random shuffle) + unless you know all the bucketX files to be read - may not be worth the trouble. + * 2nd: something in FS fails. ArrayIndexOutOfBoundsException: 1 at FileSinkOperator.process(FileSinkOperator.java:779)*/ + runStatementOnDriver("update nobuckets set c3 = 17 where c3 in(0,1)"); + rs = runStatementOnDriver("select ROW__ID, c1, c2, c3, INPUT__FILE__NAME from nobuckets order by INPUT__FILE__NAME, ROW__ID"); + LOG.warn("after update"); + for(String s : rs) { + LOG.warn(s); + } + Assert.assertTrue(rs.get(0), rs.get(0).startsWith("{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":1}\t3\t3\t3\t")); + Assert.assertTrue(rs.get(0), rs.get(0).endsWith("nobuckets/delta_0000015_0000015_0000/bucket_00000")); + Assert.assertTrue(rs.get(1), rs.get(1).startsWith("{\"transactionid\":15,\"bucketid\":536936448,\"rowid\":1}\t2\t2\t2\t")); + Assert.assertTrue(rs.get(1), rs.get(1).endsWith("nobuckets/delta_0000015_0000015_0000/bucket_00001")); + //so update has 1 writer which creates bucket0 where both new rows land + Assert.assertTrue(rs.get(2), rs.get(2).startsWith("{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t17\t")); + Assert.assertTrue(rs.get(2), rs.get(2).endsWith("nobuckets/delta_0000017_0000017_0000/bucket_00000")); + Assert.assertTrue(rs.get(3), rs.get(3).startsWith("{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t17\t")); + Assert.assertTrue(rs.get(3), rs.get(3).endsWith("nobuckets/delta_0000017_0000017_0000/bucket_00000")); + + Set expectedFiles = new HashSet<>(); + //both delete events land in a single bucket0. Each has a different ROW__ID.bucketId value (even writerId in it is different) + expectedFiles.add("ts/delete_delta_0000017_0000017_0000/bucket_00000"); + expectedFiles.add("nobuckets/delta_0000015_0000015_0000/bucket_00000"); + expectedFiles.add("nobuckets/delta_0000015_0000015_0000/bucket_00001"); + expectedFiles.add("nobuckets/delta_0000017_0000017_0000/bucket_00000"); + //check that we get the right files on disk + assertExpectedFileSet(expectedFiles, getWarehouseDir() + "/nobuckets"); + //todo: it would be nice to check the contents of the files... could use orc.FileDump - it has + // methods to print to a supplied stream but those are package private + + runStatementOnDriver("alter table nobuckets compact 'major'"); + TestTxnCommands2.runWorker(hiveConf); + rs = runStatementOnDriver("select ROW__ID, c1, c2, c3, INPUT__FILE__NAME from nobuckets order by INPUT__FILE__NAME, ROW__ID"); + LOG.warn("after major compact"); + for(String s : rs) { + LOG.warn(s); + } + /* +├── base_0000017 +│   ├── bucket_00000 +│   └── bucket_00001 +├── delete_delta_0000017_0000017_0000 +│   └── bucket_00000 +├── delta_0000015_0000015_0000 +│   ├── bucket_00000 +│   └── bucket_00001 +└── delta_0000017_0000017_0000 + └── bucket_00000 + */ + Assert.assertTrue(rs.get(0), rs.get(0).startsWith("{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":1}\t3\t3\t3\t")); + Assert.assertTrue(rs.get(0), rs.get(0).endsWith("nobuckets/base_0000017/bucket_00000")); + Assert.assertTrue(rs.get(1), rs.get(1).startsWith("{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t17\t")); + Assert.assertTrue(rs.get(1), rs.get(1).endsWith("nobuckets/base_0000017/bucket_00000")); + Assert.assertTrue(rs.get(2), rs.get(2).startsWith("{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t17\t")); + Assert.assertTrue(rs.get(2), rs.get(2).endsWith("nobuckets/base_0000017/bucket_00000")); + Assert.assertTrue(rs.get(3), rs.get(3).startsWith("{\"transactionid\":15,\"bucketid\":536936448,\"rowid\":1}\t2\t2\t2\t")); + Assert.assertTrue(rs.get(3), rs.get(3).endsWith("nobuckets/base_0000017/bucket_00001")); + + expectedFiles.clear(); + expectedFiles.add("delete_delta_0000017_0000017_0000/bucket_00000"); + expectedFiles.add("uckets/delta_0000015_0000015_0000/bucket_00000"); + expectedFiles.add("uckets/delta_0000015_0000015_0000/bucket_00001"); + expectedFiles.add("uckets/delta_0000017_0000017_0000/bucket_00000"); + expectedFiles.add("/warehouse/nobuckets/base_0000017/bucket_00000"); + expectedFiles.add("/warehouse/nobuckets/base_0000017/bucket_00001"); + assertExpectedFileSet(expectedFiles, getWarehouseDir() + "/nobuckets"); + + TestTxnCommands2.runCleaner(hiveConf); + rs = runStatementOnDriver("select c1, c2, c3 from nobuckets order by c1, c2, c3"); + int[][] result = {{0,0,17},{1,1,17},{2,2,2},{3,3,3}}; + Assert.assertEquals("Unexpected result after clean", stringifyValues(result), rs); + + expectedFiles.clear(); + expectedFiles.add("nobuckets/base_0000017/bucket_00000"); + expectedFiles.add("nobuckets/base_0000017/bucket_00001"); + assertExpectedFileSet(expectedFiles, getWarehouseDir() + "/nobuckets"); + } + //todo: try Insert with union all +} + diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommandsBase.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommandsBase.java new file mode 100644 index 0000000000..b0e1b1ab7f --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommandsBase.java @@ -0,0 +1,160 @@ +package org.apache.hadoop.hive.ql; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.txn.TxnDbUtil; +import org.apache.hadoop.hive.ql.io.HiveInputFormat; +import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.rules.TestName; + +import java.io.File; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +public abstract class TestTxnCommandsBase { + //bucket count for test tables; set it to 1 for easier debugging + final static int BUCKET_COUNT = 2; + @Rule + public TestName testName = new TestName(); + HiveConf hiveConf; + Driver d; + enum Table { + ACIDTBL("acidTbl"), + ACIDTBLPART("acidTblPart"), + ACIDTBL2("acidTbl2"), + NONACIDORCTBL("nonAcidOrcTbl"), + NONACIDORCTBL2("nonAcidOrcTbl2"); + + final String name; + @Override + public String toString() { + return name; + } + Table(String name) { + this.name = name; + } + } + + @Before + public void setUp() throws Exception { + setUpInternal(); + } + void setUpInternal() throws Exception { + tearDown(); + hiveConf = new HiveConf(this.getClass()); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, getWarehouseDir()); + hiveConf.setVar(HiveConf.ConfVars.HIVEMAPREDMODE, "nonstrict"); + hiveConf.setVar(HiveConf.ConfVars.HIVEINPUTFORMAT, HiveInputFormat.class.getName()); + hiveConf + .setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, + "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); + hiveConf.setBoolVar(HiveConf.ConfVars.MERGE_CARDINALITY_VIOLATION_CHECK, true); + TxnDbUtil.setConfValues(hiveConf); + TxnDbUtil.prepDb(); + File f = new File(getWarehouseDir()); + if (f.exists()) { + FileUtil.fullyDelete(f); + } + if (!(new File(getWarehouseDir()).mkdirs())) { + throw new RuntimeException("Could not create " + getWarehouseDir()); + } + SessionState.start(new SessionState(hiveConf)); + d = new Driver(hiveConf); + d.setMaxRows(10000); + dropTables(); + runStatementOnDriver("create table " + TestTxnCommandsBase.Table.ACIDTBL + "(a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='true')"); + runStatementOnDriver("create table " + TestTxnCommandsBase.Table.ACIDTBLPART + "(a int, b int) partitioned by (p string) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='true')"); + runStatementOnDriver("create table " + TestTxnCommandsBase.Table.NONACIDORCTBL + "(a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='false')"); + runStatementOnDriver("create table " + TestTxnCommandsBase.Table.NONACIDORCTBL2 + "(a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='false')"); + runStatementOnDriver("create temporary table " + TestTxnCommandsBase.Table.ACIDTBL2 + "(a int, b int, c int) clustered by (c) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='true')"); + } + private void dropTables() throws Exception { + for(TestTxnCommandsBase.Table t : TestTxnCommandsBase.Table.values()) { + runStatementOnDriver("drop table if exists " + t); + } + } + @After + public void tearDown() throws Exception { + try { + if (d != null) { + dropTables(); + d.destroy(); + d.close(); + d = null; + } + } finally { + TxnDbUtil.cleanDb(); + FileUtils.deleteDirectory(new File(getTestDataDir())); + } + } + String getWarehouseDir() { + return getTestDataDir() + "/warehouse"; + } + abstract String getTestDataDir(); + /** + * takes raw data and turns it into a string as if from Driver.getResults() + * sorts rows in dictionary order + */ + List stringifyValues(int[][] rowsIn) { + return TestTxnCommands2.stringifyValues(rowsIn); + } + String makeValuesClause(int[][] rows) { + return TestTxnCommands2.makeValuesClause(rows); + } + + List runStatementOnDriver(String stmt) throws Exception { + CommandProcessorResponse cpr = d.run(stmt); + if(cpr.getResponseCode() != 0) { + throw new RuntimeException(stmt + " failed: " + cpr); + } + List rs = new ArrayList(); + d.getResults(rs); + return rs; + } + CommandProcessorResponse runStatementOnDriverNegative(String stmt) throws Exception { + CommandProcessorResponse cpr = d.run(stmt); + if(cpr.getResponseCode() != 0) { + return cpr; + } + throw new RuntimeException("Didn't get expected failure!"); + } + /** + * Will assert that actual files match expected. + * @param expectedFiles - suffixes of expected Paths. Must be the same length + * @param rootPath - table or patition root where to start looking for actual files, recursively + */ + void assertExpectedFileSet(Set expectedFiles, String rootPath) throws Exception { + int suffixLength = 0; + for(String s : expectedFiles) { + if(suffixLength > 0) { + assert suffixLength == s.length() : "all entries must be the same length. current: " + s; + } + suffixLength = s.length(); + } + FileSystem fs = FileSystem.get(hiveConf); + Set actualFiles = new HashSet<>(); + RemoteIterator remoteIterator = fs.listFiles(new Path(rootPath), true); + while (remoteIterator.hasNext()) { + LocatedFileStatus lfs = remoteIterator.next(); + if(!lfs.isDirectory() && org.apache.hadoop.hive.common.FileUtils.HIDDEN_FILES_PATH_FILTER.accept(lfs.getPath())) { + String p = lfs.getPath().toString(); + actualFiles.add(p.substring(p.length() - suffixLength, p.length())); + } + } + Assert.assertEquals("Unexpected file list", expectedFiles, actualFiles); + } +} diff --git ql/src/test/queries/clientpositive/acid_no_buckets.q ql/src/test/queries/clientpositive/acid_no_buckets.q new file mode 100644 index 0000000000..c2f713e9b7 --- /dev/null +++ ql/src/test/queries/clientpositive/acid_no_buckets.q @@ -0,0 +1,210 @@ +--this has 4 groups of tests +--Acid tables w/o bucketing +--the tests with bucketing (make sure we get the same results) +--same tests with and w/o vectorization + +set hive.mapred.mode=nonstrict; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.vectorized.execution.enabled=false; +set hive.explain.user=false; +set hive.merge.cardinality.check=true; + +drop table if exists srcpart_acid; +CREATE TABLE srcpart_acid (key STRING, value STRING) PARTITIONED BY (ds STRING, hr STRING) stored as ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default'); +insert into srcpart_acid PARTITION (ds, hr) select * from srcpart; + +--2 rows for 413, 1 row for 43, 2 for 213, 1 for 44 in kv1.txt (in each partition) +select ds, hr, key, value from srcpart_acid where cast(key as integer) in(413,43) and hr='11' order by ds, hr, cast(key as integer); + +analyze table srcpart_acid PARTITION(ds, hr) compute statistics; +analyze table srcpart_acid PARTITION(ds, hr) compute statistics for columns; +explain update srcpart_acid set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11'; +update srcpart_acid set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11'; +select ds, hr, key, value from srcpart_acid where value like '%updated' order by ds, hr, cast(key as integer); + +insert into srcpart_acid PARTITION (ds='2008-04-08', hr=='11') values ('1001','val1001'),('1002','val1002'),('1003','val1003'); +select ds, hr, key, value from srcpart_acid where cast(key as integer) > 1000 order by ds, hr, cast(key as integer); + +analyze table srcpart_acid PARTITION(ds, hr) compute statistics; +analyze table srcpart_acid PARTITION(ds, hr) compute statistics for columns; +explain delete from srcpart_acid where key in( '1001', '213', '43'); +--delete some rows from initial load, some that were updated and some that were inserted +delete from srcpart_acid where key in( '1001', '213', '43'); + +--make sure we deleted everything that should've been deleted +select count(*) from srcpart_acid where key in( '1001', '213', '43'); +--make sure nothing extra was deleted (2000 + 3 (insert) - 4 - 1 - 8 = 1990) +select count(*) from srcpart_acid; + +--todo: should really have a way to run compactor here.... + +--update should match 1 rows in 1 partition +--delete should drop everything from 1 partition +--insert should do nothing +merge into srcpart_acid t using (select distinct ds, hr, key, value from srcpart_acid) s +on s.ds=t.ds and s.hr=t.hr and s.key=t.key and s.value=t.value +when matched and s.ds='2008-04-08' and s.hr=='11' and s.key='44' then update set value=concat(s.value,'updated by merge') +when matched and s.ds='2008-04-08' and s.hr=='12' then delete +when not matched then insert values('this','should','not','be there'); + +--check results +--should be 0 +select count(*) from srcpart_acid where ds='2008-04-08' and hr=='12'; +--should be 1 rows +select ds, hr, key, value from srcpart_acid where value like '%updated by merge'; +--should be 0 +select count(*) from srcpart_acid where ds = 'this' and hr = 'should' and key = 'not' and value = 'be there'; +drop table if exists srcpart_acid; + + +drop table if exists srcpart_acidb; +CREATE TABLE srcpart_acidb (key STRING, value STRING) PARTITIONED BY (ds STRING, hr STRING) CLUSTERED BY(key) INTO 2 BUCKETS stored as ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default'); +insert into srcpart_acidb PARTITION (ds, hr) select * from srcpart; + +--2 rows for 413, 1 row for 43, 2 for 213, 2 for 12 in kv1.txt (in each partition) +select ds, hr, key, value from srcpart_acidb where cast(key as integer) in(413,43) and hr='11' order by ds, hr, cast(key as integer); + +analyze table srcpart_acidb PARTITION(ds, hr) compute statistics; +analyze table srcpart_acidb PARTITION(ds, hr) compute statistics for columns; +explain update srcpart_acidb set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11'; +update srcpart_acidb set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11'; +select ds, hr, key, value from srcpart_acidb where value like '%updated' order by ds, hr, cast(key as integer); + +insert into srcpart_acidb PARTITION (ds='2008-04-08', hr=='11') values ('1001','val1001'),('1002','val1002'),('1003','val1003'); +select ds, hr, key, value from srcpart_acidb where cast(key as integer) > 1000 order by ds, hr, cast(key as integer); + +analyze table srcpart_acidb PARTITION(ds, hr) compute statistics; +analyze table srcpart_acidb PARTITION(ds, hr) compute statistics for columns; +explain delete from srcpart_acidb where key in( '1001', '213', '43'); +--delete some rows from initial load, some that were updated and some that were inserted +delete from srcpart_acidb where key in( '1001', '213', '43'); + +--make sure we deleted everything that should've been deleted +select count(*) from srcpart_acidb where key in( '1001', '213', '43'); +--make sure nothing extra was deleted (2000 + 3 (insert) - 4 - 1 - 8 = 1990) +select count(*) from srcpart_acidb; + + +--todo: should really have a way to run compactor here.... + +--update should match 1 rows in 1 partition +--delete should drop everything from 1 partition +--insert should do nothing +merge into srcpart_acidb t using (select distinct ds, hr, key, value from srcpart_acidb) s +on s.ds=t.ds and s.hr=t.hr and s.key=t.key and s.value=t.value +when matched and s.ds='2008-04-08' and s.hr=='11' and s.key='44' then update set value=concat(s.value,'updated by merge') +when matched and s.ds='2008-04-08' and s.hr=='12' then delete +when not matched then insert values('this','should','not','be there'); + +--check results +--should be 0 +select count(*) from srcpart_acidb where ds='2008-04-08' and hr=='12'; +--should be 1 rows +select ds, hr, key, value from srcpart_acidb where value like '%updated by merge'; +--should be 0 +select count(*) from srcpart_acidb where ds = 'this' and hr = 'should' and key = 'not' and value = 'be there'; +drop table if exists srcpart_acidb; + + + +--now same thing but vectorized +set hive.vectorized.execution.enabled=true; + +drop table if exists srcpart_acidv; +CREATE TABLE srcpart_acidv (key STRING, value STRING) PARTITIONED BY (ds STRING, hr STRING) stored as ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default'); +insert into srcpart_acidv PARTITION (ds, hr) select * from srcpart; + +--2 rows for 413, 21 row for 43, 2 for 213, 2 for 12 in kv1.txt (in each partition) +select ds, hr, key, value from srcpart_acidv where cast(key as integer) in(413,43) and hr='11' order by ds, hr, cast(key as integer); + +analyze table srcpart_acidv PARTITION(ds, hr) compute statistics; +analyze table srcpart_acidv PARTITION(ds, hr) compute statistics for columns; +explain update srcpart_acidv set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11'; +update srcpart_acidv set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11'; +select ds, hr, key, value from srcpart_acidv where value like '%updated' order by ds, hr, cast(key as integer); + +insert into srcpart_acidv PARTITION (ds='2008-04-08', hr=='11') values ('1001','val1001'),('1002','val1002'),('1003','val1003'); +select ds, hr, key, value from srcpart_acidv where cast(key as integer) > 1000 order by ds, hr, cast(key as integer); + +analyze table srcpart_acidv PARTITION(ds, hr) compute statistics; +analyze table srcpart_acidv PARTITION(ds, hr) compute statistics for columns; +explain delete from srcpart_acidv where key in( '1001', '213', '43'); +--delete some rows from initial load, some that were updated and some that were inserted +delete from srcpart_acidv where key in( '1001', '213', '43'); + +--make sure we deleted everything that should've been deleted +select count(*) from srcpart_acidv where key in( '1001', '213', '43'); +--make sure nothing extra was deleted (2000 + 3 - 4 - 1 - 8 = 1990) +select count(*) from srcpart_acidv; + +--todo: should really have a way to run compactor here.... + +--update should match 1 rows in 1 partition +--delete should drop everything from 1 partition +--insert should do nothing +merge into srcpart_acidv t using (select distinct ds, hr, key, value from srcpart_acidv) s +on s.ds=t.ds and s.hr=t.hr and s.key=t.key and s.value=t.value +when matched and s.ds='2008-04-08' and s.hr=='11' and s.key='44' then update set value=concat(s.value,'updated by merge') +when matched and s.ds='2008-04-08' and s.hr=='12' then delete +when not matched then insert values('this','should','not','be there'); + +--check results +--should be 0 +select count(*) from srcpart_acidv where ds='2008-04-08' and hr=='12'; +--should be 1 rows +select ds, hr, key, value from srcpart_acidv where value like '%updated by merge'; +--should be 0 +select count(*) from srcpart_acidv where ds = 'this' and hr = 'should' and key = 'not' and value = 'be there'; +drop table if exists srcpart_acidv; + + + +drop table if exists srcpart_acidvb; +CREATE TABLE srcpart_acidvb (key STRING, value STRING) PARTITIONED BY (ds STRING, hr STRING) CLUSTERED BY(key) INTO 2 BUCKETS stored as ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default'); +insert into srcpart_acidvb PARTITION (ds, hr) select * from srcpart; + +--2 rows for 413, 1 row for 43, 2 for 213, 2 for 12 in kv1.txt (in each partition) +select ds, hr, key, value from srcpart_acidvb where cast(key as integer) in(413,43) and hr='11' order by ds, hr, cast(key as integer); + +analyze table srcpart_acidvb PARTITION(ds, hr) compute statistics; +analyze table srcpart_acidvb PARTITION(ds, hr) compute statistics for columns; +explain update srcpart_acidvb set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11'; +update srcpart_acidvb set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11'; +select ds, hr, key, value from srcpart_acidvb where value like '%updated' order by ds, hr, cast(key as integer); + +insert into srcpart_acidvb PARTITION (ds='2008-04-08', hr=='11') values ('1001','val1001'),('1002','val1002'),('1003','val1003'); +select ds, hr, key, value from srcpart_acidvb where cast(key as integer) > 1000 order by ds, hr, cast(key as integer); + +analyze table srcpart_acidvb PARTITION(ds, hr) compute statistics; +analyze table srcpart_acidvb PARTITION(ds, hr) compute statistics for columns; +explain delete from srcpart_acidvb where key in( '1001', '213', '43'); +--delete some rows from initial load, some that were updated and some that were inserted +delete from srcpart_acidvb where key in( '1001', '213', '43'); + +--make sure we deleted everything that should've been deleted +select count(*) from srcpart_acidvb where key in( '1001', '213', '43'); +--make sure nothing extra was deleted (2000 + 3 (insert) - 4 - 1 - 8 = 1990) +select count(*) from srcpart_acidvb; + + +--todo: should really have a way to run compactor here.... + +--update should match 1 rows in 1 partition +--delete should drop everything from 1 partition +--insert should do nothing +merge into srcpart_acidvb t using (select distinct ds, hr, key, value from srcpart_acidvb) s +on s.ds=t.ds and s.hr=t.hr and s.key=t.key and s.value=t.value +when matched and s.ds='2008-04-08' and s.hr=='11' and s.key='44' then update set value=concat(s.value,'updated by merge') +when matched and s.ds='2008-04-08' and s.hr=='12' then delete +when not matched then insert values('this','should','not','be there'); + +--check results +--should be 0 +select count(*) from srcpart_acidvb where ds='2008-04-08' and hr=='12'; +--should be 1 rows +select ds, hr, key, value from srcpart_acidvb where value like '%updated by merge'; +--should be 0 +select count(*) from srcpart_acidvb where ds = 'this' and hr = 'should' and key = 'not' and value = 'be there'; +drop table if exists srcpart_acidvb; diff --git ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out new file mode 100644 index 0000000000..34dd4877e8 --- /dev/null +++ ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out @@ -0,0 +1,1976 @@ +PREHOOK: query: drop table if exists srcpart_acid +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists srcpart_acid +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE srcpart_acid (key STRING, value STRING) PARTITIONED BY (ds STRING, hr STRING) stored as ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcpart_acid +POSTHOOK: query: CREATE TABLE srcpart_acid (key STRING, value STRING) PARTITIONED BY (ds STRING, hr STRING) stored as ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcpart_acid +PREHOOK: query: insert into srcpart_acid PARTITION (ds, hr) select * from srcpart +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acid +POSTHOOK: query: insert into srcpart_acid PARTITION (ds, hr) select * from srcpart +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select ds, hr, key, value from srcpart_acid where cast(key as integer) in(413,43) and hr='11' order by ds, hr, cast(key as integer) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acid where cast(key as integer) in(413,43) and hr='11' order by ds, hr, cast(key as integer) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +#### A masked pattern was here #### +2008-04-08 11 43 val_43 +2008-04-08 11 413 val_413 +2008-04-08 11 413 val_413 +2008-04-09 11 43 val_43 +2008-04-09 11 413 val_413 +2008-04-09 11 413 val_413 +PREHOOK: query: analyze table srcpart_acid PARTITION(ds, hr) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +PREHOOK: Output: default@srcpart_acid +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +POSTHOOK: query: analyze table srcpart_acid PARTITION(ds, hr) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Output: default@srcpart_acid +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +PREHOOK: query: analyze table srcpart_acid PARTITION(ds, hr) compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acid +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcpart_acid PARTITION(ds, hr) compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acid +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +PREHOOK: query: explain update srcpart_acid set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +PREHOOK: type: QUERY +POSTHOOK: query: explain update srcpart_acid set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_acid + Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: (UDFToInteger(key)) IN (413, 43) (type: boolean) + Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: ROW__ID (type: struct), key (type: string), concat(value, 'updated') (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acid + Write Type: UPDATE + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds + hr + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acid + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: update srcpart_acid set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: query: update srcpart_acid set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: query: select ds, hr, key, value from srcpart_acid where value like '%updated' order by ds, hr, cast(key as integer) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acid where value like '%updated' order by ds, hr, cast(key as integer) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-08 11 43 val_43updated +2008-04-08 11 413 val_413updated +2008-04-08 11 413 val_413updated +2008-04-09 11 43 val_43updated +2008-04-09 11 413 val_413updated +2008-04-09 11 413 val_413updated +PREHOOK: query: insert into srcpart_acid PARTITION (ds='2008-04-08', hr=='11') values ('1001','val1001'),('1002','val1002'),('1003','val1003') +PREHOOK: type: QUERY +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: query: insert into srcpart_acid PARTITION (ds='2008-04-08', hr=='11') values ('1001','val1001'),('1002','val1002'),('1003','val1003') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: srcpart_acid PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select ds, hr, key, value from srcpart_acid where cast(key as integer) > 1000 order by ds, hr, cast(key as integer) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acid where cast(key as integer) > 1000 order by ds, hr, cast(key as integer) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-08 11 1001 val1001 +2008-04-08 11 1002 val1002 +2008-04-08 11 1003 val1003 +PREHOOK: query: analyze table srcpart_acid PARTITION(ds, hr) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +PREHOOK: Output: default@srcpart_acid +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +POSTHOOK: query: analyze table srcpart_acid PARTITION(ds, hr) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Output: default@srcpart_acid +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +PREHOOK: query: analyze table srcpart_acid PARTITION(ds, hr) compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acid +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcpart_acid PARTITION(ds, hr) compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acid +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +PREHOOK: query: explain delete from srcpart_acid where key in( '1001', '213', '43') +PREHOOK: type: QUERY +POSTHOOK: query: explain delete from srcpart_acid where key in( '1001', '213', '43') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_acid + Statistics: Num rows: 2015 Data size: 916825 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: (key) IN ('1001', '213', '43') (type: boolean) + Statistics: Num rows: 20 Data size: 9100 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: ROW__ID (type: struct), ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: string), _col2 (type: string) + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acid + Write Type: DELETE + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds + hr + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acid + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: delete from srcpart_acid where key in( '1001', '213', '43') +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +POSTHOOK: query: delete from srcpart_acid where key in( '1001', '213', '43') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +PREHOOK: query: select count(*) from srcpart_acid where key in( '1001', '213', '43') +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acid where key in( '1001', '213', '43') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +0 +PREHOOK: query: select count(*) from srcpart_acid +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +1990 +PREHOOK: query: merge into srcpart_acid t using (select distinct ds, hr, key, value from srcpart_acid) s +on s.ds=t.ds and s.hr=t.hr and s.key=t.key and s.value=t.value +when matched and s.ds='2008-04-08' and s.hr=='11' and s.key='44' then update set value=concat(s.value,'updated by merge') +when matched and s.ds='2008-04-08' and s.hr=='12' then delete +when not matched then insert values('this','should','not','be there') +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +PREHOOK: Output: default@merge_tmp_table +PREHOOK: Output: default@srcpart_acid +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +POSTHOOK: query: merge into srcpart_acid t using (select distinct ds, hr, key, value from srcpart_acid) s +on s.ds=t.ds and s.hr=t.hr and s.key=t.key and s.value=t.value +when matched and s.ds='2008-04-08' and s.hr=='11' and s.key='44' then update set value=concat(s.value,'updated by merge') +when matched and s.ds='2008-04-08' and s.hr=='12' then delete +when not matched then insert values('this','should','not','be there') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@merge_tmp_table +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acid@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(srcpart_acid)t.FieldSchema(name:ROW__ID, type:struct, comment:), (srcpart_acid)t.FieldSchema(name:ds, type:string, comment:null), (srcpart_acid)t.FieldSchema(name:hr, type:string, comment:null), ] +PREHOOK: query: select count(*) from srcpart_acid where ds='2008-04-08' and hr=='12' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acid where ds='2008-04-08' and hr=='12' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +0 +PREHOOK: query: select ds, hr, key, value from srcpart_acid where value like '%updated by merge' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acid where value like '%updated by merge' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acid@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-08 11 44 val_44updated by merge +PREHOOK: query: select count(*) from srcpart_acid where ds = 'this' and hr = 'should' and key = 'not' and value = 'be there' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acid +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acid where ds = 'this' and hr = 'should' and key = 'not' and value = 'be there' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acid +#### A masked pattern was here #### +0 +PREHOOK: query: drop table if exists srcpart_acid +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@srcpart_acid +PREHOOK: Output: default@srcpart_acid +POSTHOOK: query: drop table if exists srcpart_acid +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcpart_acid +POSTHOOK: Output: default@srcpart_acid +PREHOOK: query: drop table if exists srcpart_acidb +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists srcpart_acidb +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE srcpart_acidb (key STRING, value STRING) PARTITIONED BY (ds STRING, hr STRING) CLUSTERED BY(key) INTO 2 BUCKETS stored as ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcpart_acidb +POSTHOOK: query: CREATE TABLE srcpart_acidb (key STRING, value STRING) PARTITIONED BY (ds STRING, hr STRING) CLUSTERED BY(key) INTO 2 BUCKETS stored as ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcpart_acidb +PREHOOK: query: insert into srcpart_acidb PARTITION (ds, hr) select * from srcpart +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidb +POSTHOOK: query: insert into srcpart_acidb PARTITION (ds, hr) select * from srcpart +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: srcpart_acidb PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidb PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidb PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidb PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidb PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidb PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidb PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidb PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select ds, hr, key, value from srcpart_acidb where cast(key as integer) in(413,43) and hr='11' order by ds, hr, cast(key as integer) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acidb where cast(key as integer) in(413,43) and hr='11' order by ds, hr, cast(key as integer) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +#### A masked pattern was here #### +2008-04-08 11 43 val_43 +2008-04-08 11 413 val_413 +2008-04-08 11 413 val_413 +2008-04-09 11 43 val_43 +2008-04-09 11 413 val_413 +2008-04-09 11 413 val_413 +PREHOOK: query: analyze table srcpart_acidb PARTITION(ds, hr) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Output: default@srcpart_acidb +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +POSTHOOK: query: analyze table srcpart_acidb PARTITION(ds, hr) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Output: default@srcpart_acidb +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +PREHOOK: query: analyze table srcpart_acidb PARTITION(ds, hr) compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidb +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcpart_acidb PARTITION(ds, hr) compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidb +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +PREHOOK: query: explain update srcpart_acidb set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +PREHOOK: type: QUERY +POSTHOOK: query: explain update srcpart_acidb set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_acidb + Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: (UDFToInteger(key)) IN (413, 43) (type: boolean) + Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: ROW__ID (type: struct), key (type: string), concat(value, 'updated') (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acidb + Write Type: UPDATE + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds + hr + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acidb + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: update srcpart_acidb set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: query: update srcpart_acidb set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: query: select ds, hr, key, value from srcpart_acidb where value like '%updated' order by ds, hr, cast(key as integer) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acidb where value like '%updated' order by ds, hr, cast(key as integer) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-08 11 43 val_43updated +2008-04-08 11 413 val_413updated +2008-04-08 11 413 val_413updated +2008-04-09 11 43 val_43updated +2008-04-09 11 413 val_413updated +2008-04-09 11 413 val_413updated +PREHOOK: query: insert into srcpart_acidb PARTITION (ds='2008-04-08', hr=='11') values ('1001','val1001'),('1002','val1002'),('1003','val1003') +PREHOOK: type: QUERY +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: query: insert into srcpart_acidb PARTITION (ds='2008-04-08', hr=='11') values ('1001','val1001'),('1002','val1002'),('1003','val1003') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: srcpart_acidb PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: srcpart_acidb PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select ds, hr, key, value from srcpart_acidb where cast(key as integer) > 1000 order by ds, hr, cast(key as integer) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acidb where cast(key as integer) > 1000 order by ds, hr, cast(key as integer) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-08 11 1001 val1001 +2008-04-08 11 1002 val1002 +2008-04-08 11 1003 val1003 +PREHOOK: query: analyze table srcpart_acidb PARTITION(ds, hr) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Output: default@srcpart_acidb +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +POSTHOOK: query: analyze table srcpart_acidb PARTITION(ds, hr) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Output: default@srcpart_acidb +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +PREHOOK: query: analyze table srcpart_acidb PARTITION(ds, hr) compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidb +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcpart_acidb PARTITION(ds, hr) compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidb +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +PREHOOK: query: explain delete from srcpart_acidb where key in( '1001', '213', '43') +PREHOOK: type: QUERY +POSTHOOK: query: explain delete from srcpart_acidb where key in( '1001', '213', '43') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_acidb + Statistics: Num rows: 2015 Data size: 916825 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: (key) IN ('1001', '213', '43') (type: boolean) + Statistics: Num rows: 20 Data size: 9100 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: ROW__ID (type: struct), ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: string), _col2 (type: string) + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acidb + Write Type: DELETE + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds + hr + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acidb + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: delete from srcpart_acidb where key in( '1001', '213', '43') +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +POSTHOOK: query: delete from srcpart_acidb where key in( '1001', '213', '43') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +PREHOOK: query: select count(*) from srcpart_acidb where key in( '1001', '213', '43') +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acidb where key in( '1001', '213', '43') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +0 +PREHOOK: query: select count(*) from srcpart_acidb +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acidb +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +1990 +PREHOOK: query: merge into srcpart_acidb t using (select distinct ds, hr, key, value from srcpart_acidb) s +on s.ds=t.ds and s.hr=t.hr and s.key=t.key and s.value=t.value +when matched and s.ds='2008-04-08' and s.hr=='11' and s.key='44' then update set value=concat(s.value,'updated by merge') +when matched and s.ds='2008-04-08' and s.hr=='12' then delete +when not matched then insert values('this','should','not','be there') +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +PREHOOK: Output: default@merge_tmp_table +PREHOOK: Output: default@srcpart_acidb +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +POSTHOOK: query: merge into srcpart_acidb t using (select distinct ds, hr, key, value from srcpart_acidb) s +on s.ds=t.ds and s.hr=t.hr and s.key=t.key and s.value=t.value +when matched and s.ds='2008-04-08' and s.hr=='11' and s.key='44' then update set value=concat(s.value,'updated by merge') +when matched and s.ds='2008-04-08' and s.hr=='12' then delete +when not matched then insert values('this','should','not','be there') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@merge_tmp_table +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidb@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(srcpart_acidb)t.FieldSchema(name:ROW__ID, type:struct, comment:), (srcpart_acidb)t.FieldSchema(name:ds, type:string, comment:null), (srcpart_acidb)t.FieldSchema(name:hr, type:string, comment:null), ] +PREHOOK: query: select count(*) from srcpart_acidb where ds='2008-04-08' and hr=='12' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acidb where ds='2008-04-08' and hr=='12' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +0 +PREHOOK: query: select ds, hr, key, value from srcpart_acidb where value like '%updated by merge' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acidb where value like '%updated by merge' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-08 11 44 val_44updated by merge +PREHOOK: query: select count(*) from srcpart_acidb where ds = 'this' and hr = 'should' and key = 'not' and value = 'be there' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidb +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acidb where ds = 'this' and hr = 'should' and key = 'not' and value = 'be there' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidb +#### A masked pattern was here #### +0 +PREHOOK: query: drop table if exists srcpart_acidb +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@srcpart_acidb +PREHOOK: Output: default@srcpart_acidb +POSTHOOK: query: drop table if exists srcpart_acidb +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcpart_acidb +POSTHOOK: Output: default@srcpart_acidb +PREHOOK: query: drop table if exists srcpart_acidv +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists srcpart_acidv +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE srcpart_acidv (key STRING, value STRING) PARTITIONED BY (ds STRING, hr STRING) stored as ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcpart_acidv +POSTHOOK: query: CREATE TABLE srcpart_acidv (key STRING, value STRING) PARTITIONED BY (ds STRING, hr STRING) stored as ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcpart_acidv +PREHOOK: query: insert into srcpart_acidv PARTITION (ds, hr) select * from srcpart +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidv +POSTHOOK: query: insert into srcpart_acidv PARTITION (ds, hr) select * from srcpart +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: srcpart_acidv PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidv PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidv PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidv PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidv PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidv PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidv PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidv PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select ds, hr, key, value from srcpart_acidv where cast(key as integer) in(413,43) and hr='11' order by ds, hr, cast(key as integer) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acidv where cast(key as integer) in(413,43) and hr='11' order by ds, hr, cast(key as integer) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +#### A masked pattern was here #### +2008-04-08 11 43 val_43 +2008-04-08 11 413 val_413 +2008-04-08 11 413 val_413 +2008-04-09 11 43 val_43 +2008-04-09 11 413 val_413 +2008-04-09 11 413 val_413 +PREHOOK: query: analyze table srcpart_acidv PARTITION(ds, hr) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Output: default@srcpart_acidv +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +POSTHOOK: query: analyze table srcpart_acidv PARTITION(ds, hr) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Output: default@srcpart_acidv +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +PREHOOK: query: analyze table srcpart_acidv PARTITION(ds, hr) compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidv +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcpart_acidv PARTITION(ds, hr) compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidv +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +PREHOOK: query: explain update srcpart_acidv set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +PREHOOK: type: QUERY +POSTHOOK: query: explain update srcpart_acidv set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_acidv + Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: (UDFToInteger(key)) IN (413, 43) (type: boolean) + Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: ROW__ID (type: struct), key (type: string), concat(value, 'updated') (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acidv + Write Type: UPDATE + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds + hr + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acidv + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: update srcpart_acidv set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: query: update srcpart_acidv set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: query: select ds, hr, key, value from srcpart_acidv where value like '%updated' order by ds, hr, cast(key as integer) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acidv where value like '%updated' order by ds, hr, cast(key as integer) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-08 11 43 val_43updated +2008-04-08 11 413 val_413updated +2008-04-08 11 413 val_413updated +2008-04-09 11 43 val_43updated +2008-04-09 11 413 val_413updated +2008-04-09 11 413 val_413updated +PREHOOK: query: insert into srcpart_acidv PARTITION (ds='2008-04-08', hr=='11') values ('1001','val1001'),('1002','val1002'),('1003','val1003') +PREHOOK: type: QUERY +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: query: insert into srcpart_acidv PARTITION (ds='2008-04-08', hr=='11') values ('1001','val1001'),('1002','val1002'),('1003','val1003') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: srcpart_acidv PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: srcpart_acidv PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select ds, hr, key, value from srcpart_acidv where cast(key as integer) > 1000 order by ds, hr, cast(key as integer) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acidv where cast(key as integer) > 1000 order by ds, hr, cast(key as integer) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-08 11 1001 val1001 +2008-04-08 11 1002 val1002 +2008-04-08 11 1003 val1003 +PREHOOK: query: analyze table srcpart_acidv PARTITION(ds, hr) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Output: default@srcpart_acidv +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +POSTHOOK: query: analyze table srcpart_acidv PARTITION(ds, hr) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Output: default@srcpart_acidv +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +PREHOOK: query: analyze table srcpart_acidv PARTITION(ds, hr) compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidv +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcpart_acidv PARTITION(ds, hr) compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidv +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +PREHOOK: query: explain delete from srcpart_acidv where key in( '1001', '213', '43') +PREHOOK: type: QUERY +POSTHOOK: query: explain delete from srcpart_acidv where key in( '1001', '213', '43') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_acidv + Statistics: Num rows: 2015 Data size: 916825 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: (key) IN ('1001', '213', '43') (type: boolean) + Statistics: Num rows: 20 Data size: 9100 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: ROW__ID (type: struct), ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: string), _col2 (type: string) + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acidv + Write Type: DELETE + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds + hr + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acidv + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: delete from srcpart_acidv where key in( '1001', '213', '43') +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +POSTHOOK: query: delete from srcpart_acidv where key in( '1001', '213', '43') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +PREHOOK: query: select count(*) from srcpart_acidv where key in( '1001', '213', '43') +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acidv where key in( '1001', '213', '43') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +0 +PREHOOK: query: select count(*) from srcpart_acidv +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acidv +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +1990 +PREHOOK: query: merge into srcpart_acidv t using (select distinct ds, hr, key, value from srcpart_acidv) s +on s.ds=t.ds and s.hr=t.hr and s.key=t.key and s.value=t.value +when matched and s.ds='2008-04-08' and s.hr=='11' and s.key='44' then update set value=concat(s.value,'updated by merge') +when matched and s.ds='2008-04-08' and s.hr=='12' then delete +when not matched then insert values('this','should','not','be there') +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +PREHOOK: Output: default@merge_tmp_table +PREHOOK: Output: default@srcpart_acidv +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +POSTHOOK: query: merge into srcpart_acidv t using (select distinct ds, hr, key, value from srcpart_acidv) s +on s.ds=t.ds and s.hr=t.hr and s.key=t.key and s.value=t.value +when matched and s.ds='2008-04-08' and s.hr=='11' and s.key='44' then update set value=concat(s.value,'updated by merge') +when matched and s.ds='2008-04-08' and s.hr=='12' then delete +when not matched then insert values('this','should','not','be there') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@merge_tmp_table +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidv@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(srcpart_acidv)t.FieldSchema(name:ROW__ID, type:struct, comment:), (srcpart_acidv)t.FieldSchema(name:ds, type:string, comment:null), (srcpart_acidv)t.FieldSchema(name:hr, type:string, comment:null), ] +PREHOOK: query: select count(*) from srcpart_acidv where ds='2008-04-08' and hr=='12' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acidv where ds='2008-04-08' and hr=='12' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +0 +PREHOOK: query: select ds, hr, key, value from srcpart_acidv where value like '%updated by merge' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acidv where value like '%updated by merge' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidv@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-08 11 44 val_44updated by merge +PREHOOK: query: select count(*) from srcpart_acidv where ds = 'this' and hr = 'should' and key = 'not' and value = 'be there' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidv +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acidv where ds = 'this' and hr = 'should' and key = 'not' and value = 'be there' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidv +#### A masked pattern was here #### +0 +PREHOOK: query: drop table if exists srcpart_acidv +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@srcpart_acidv +PREHOOK: Output: default@srcpart_acidv +POSTHOOK: query: drop table if exists srcpart_acidv +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcpart_acidv +POSTHOOK: Output: default@srcpart_acidv +PREHOOK: query: drop table if exists srcpart_acidvb +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists srcpart_acidvb +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE srcpart_acidvb (key STRING, value STRING) PARTITIONED BY (ds STRING, hr STRING) CLUSTERED BY(key) INTO 2 BUCKETS stored as ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcpart_acidvb +POSTHOOK: query: CREATE TABLE srcpart_acidvb (key STRING, value STRING) PARTITIONED BY (ds STRING, hr STRING) CLUSTERED BY(key) INTO 2 BUCKETS stored as ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcpart_acidvb +PREHOOK: query: insert into srcpart_acidvb PARTITION (ds, hr) select * from srcpart +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidvb +POSTHOOK: query: insert into srcpart_acidvb PARTITION (ds, hr) select * from srcpart +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: srcpart_acidvb PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidvb PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidvb PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidvb PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidvb PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidvb PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidvb PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_acidvb PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select ds, hr, key, value from srcpart_acidvb where cast(key as integer) in(413,43) and hr='11' order by ds, hr, cast(key as integer) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acidvb where cast(key as integer) in(413,43) and hr='11' order by ds, hr, cast(key as integer) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +#### A masked pattern was here #### +2008-04-08 11 43 val_43 +2008-04-08 11 413 val_413 +2008-04-08 11 413 val_413 +2008-04-09 11 43 val_43 +2008-04-09 11 413 val_413 +2008-04-09 11 413 val_413 +PREHOOK: query: analyze table srcpart_acidvb PARTITION(ds, hr) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Output: default@srcpart_acidvb +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +POSTHOOK: query: analyze table srcpart_acidvb PARTITION(ds, hr) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Output: default@srcpart_acidvb +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +PREHOOK: query: analyze table srcpart_acidvb PARTITION(ds, hr) compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidvb +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcpart_acidvb PARTITION(ds, hr) compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidvb +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +PREHOOK: query: explain update srcpart_acidvb set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +PREHOOK: type: QUERY +POSTHOOK: query: explain update srcpart_acidvb set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_acidvb + Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: (UDFToInteger(key)) IN (413, 43) (type: boolean) + Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: ROW__ID (type: struct), key (type: string), concat(value, 'updated') (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 308500 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acidvb + Write Type: UPDATE + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds + hr + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acidvb + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: update srcpart_acidvb set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: query: update srcpart_acidvb set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: query: select ds, hr, key, value from srcpart_acidvb where value like '%updated' order by ds, hr, cast(key as integer) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acidvb where value like '%updated' order by ds, hr, cast(key as integer) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-08 11 43 val_43updated +2008-04-08 11 413 val_413updated +2008-04-08 11 413 val_413updated +2008-04-09 11 43 val_43updated +2008-04-09 11 413 val_413updated +2008-04-09 11 413 val_413updated +PREHOOK: query: insert into srcpart_acidvb PARTITION (ds='2008-04-08', hr=='11') values ('1001','val1001'),('1002','val1002'),('1003','val1003') +PREHOOK: type: QUERY +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: query: insert into srcpart_acidvb PARTITION (ds='2008-04-08', hr=='11') values ('1001','val1001'),('1002','val1002'),('1003','val1003') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: srcpart_acidvb PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: srcpart_acidvb PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select ds, hr, key, value from srcpart_acidvb where cast(key as integer) > 1000 order by ds, hr, cast(key as integer) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acidvb where cast(key as integer) > 1000 order by ds, hr, cast(key as integer) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-08 11 1001 val1001 +2008-04-08 11 1002 val1002 +2008-04-08 11 1003 val1003 +PREHOOK: query: analyze table srcpart_acidvb PARTITION(ds, hr) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Output: default@srcpart_acidvb +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +POSTHOOK: query: analyze table srcpart_acidvb PARTITION(ds, hr) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Output: default@srcpart_acidvb +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +PREHOOK: query: analyze table srcpart_acidvb PARTITION(ds, hr) compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidvb +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcpart_acidvb PARTITION(ds, hr) compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidvb +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +PREHOOK: query: explain delete from srcpart_acidvb where key in( '1001', '213', '43') +PREHOOK: type: QUERY +POSTHOOK: query: explain delete from srcpart_acidvb where key in( '1001', '213', '43') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_acidvb + Statistics: Num rows: 2015 Data size: 916825 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: (key) IN ('1001', '213', '43') (type: boolean) + Statistics: Num rows: 20 Data size: 9100 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: ROW__ID (type: struct), ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: string), _col2 (type: string) + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 8880 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acidvb + Write Type: DELETE + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds + hr + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.srcpart_acidvb + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: delete from srcpart_acidvb where key in( '1001', '213', '43') +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +POSTHOOK: query: delete from srcpart_acidvb where key in( '1001', '213', '43') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +PREHOOK: query: select count(*) from srcpart_acidvb where key in( '1001', '213', '43') +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acidvb where key in( '1001', '213', '43') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +0 +PREHOOK: query: select count(*) from srcpart_acidvb +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acidvb +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +1990 +PREHOOK: query: merge into srcpart_acidvb t using (select distinct ds, hr, key, value from srcpart_acidvb) s +on s.ds=t.ds and s.hr=t.hr and s.key=t.key and s.value=t.value +when matched and s.ds='2008-04-08' and s.hr=='11' and s.key='44' then update set value=concat(s.value,'updated by merge') +when matched and s.ds='2008-04-08' and s.hr=='12' then delete +when not matched then insert values('this','should','not','be there') +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +PREHOOK: Output: default@merge_tmp_table +PREHOOK: Output: default@srcpart_acidvb +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +POSTHOOK: query: merge into srcpart_acidvb t using (select distinct ds, hr, key, value from srcpart_acidvb) s +on s.ds=t.ds and s.hr=t.hr and s.key=t.key and s.value=t.value +when matched and s.ds='2008-04-08' and s.hr=='11' and s.key='44' then update set value=concat(s.value,'updated by merge') +when matched and s.ds='2008-04-08' and s.hr=='12' then delete +when not matched then insert values('this','should','not','be there') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@merge_tmp_table +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_acidvb@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(srcpart_acidvb)t.FieldSchema(name:ROW__ID, type:struct, comment:), (srcpart_acidvb)t.FieldSchema(name:ds, type:string, comment:null), (srcpart_acidvb)t.FieldSchema(name:hr, type:string, comment:null), ] +PREHOOK: query: select count(*) from srcpart_acidvb where ds='2008-04-08' and hr=='12' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acidvb where ds='2008-04-08' and hr=='12' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +0 +PREHOOK: query: select ds, hr, key, value from srcpart_acidvb where value like '%updated by merge' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select ds, hr, key, value from srcpart_acidvb where value like '%updated by merge' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart_acidvb@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-08 11 44 val_44updated by merge +PREHOOK: query: select count(*) from srcpart_acidvb where ds = 'this' and hr = 'should' and key = 'not' and value = 'be there' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_acidvb +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_acidvb where ds = 'this' and hr = 'should' and key = 'not' and value = 'be there' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_acidvb +#### A masked pattern was here #### +0 +PREHOOK: query: drop table if exists srcpart_acidvb +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@srcpart_acidvb +PREHOOK: Output: default@srcpart_acidvb +POSTHOOK: query: drop table if exists srcpart_acidvb +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcpart_acidvb +POSTHOOK: Output: default@srcpart_acidvb