diff --git a/data/files/sampling_data b/data/files/sampling_data new file mode 100644 index 0000000..e17b566 --- /dev/null +++ b/data/files/sampling_data @@ -0,0 +1,24 @@ +1=1/2/3/4/,1 +2=1/2/3/4/,2 +3=1/2/3/4/,3 +4=1/2/3/4/,4 +5=1/2/3/4/,5 +6=1/2/3/4/,6 +7=1/2/3/4/,7 +8=1/2/3/4/,8 +9=1/2/3/4/,9 +10=1/2/3/4/,10 +11=1/2/3/4/,11 +12=1/2/3/4/,12 +13=1/2/3/4/,13 +14=1/2/3/4/,14 +15=1/2/3/4/,15 +16=1/2/3/4/,16 +17=1/2/3/4/,17 +18=1/2/3/4/,18 +19=1/2/3/4/,19 +20=1/2/3/4/,20 +21=1/2/3/4/,21 +22=1/2/3/4/,22 +23=1/2/3/4/,23 +24=1/2/3/4/,24 diff --git a/itests/hive-blobstore/src/test/queries/clientpositive/sampling.q b/itests/hive-blobstore/src/test/queries/clientpositive/sampling.q new file mode 100644 index 0000000..7262d1d --- /dev/null +++ b/itests/hive-blobstore/src/test/queries/clientpositive/sampling.q @@ -0,0 +1,26 @@ +-- Test table sampling + +DROP TABLE sampling_intermediate; +CREATE TABLE sampling_intermediate (x STRING, y STRING) PARTITIONED BY (d INT, a INT) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY ',' +COLLECTION ITEMS TERMINATED BY '/' +MAP KEYS TERMINATED BY '=' +STORED AS TEXTFILE +LOCATION '${hiveconf:test.blobstore.path.unique}/sampling/sampling_intermediate'; +LOAD DATA LOCAL INPATH '../../data/files/sampling_data' INTO TABLE sampling_intermediate partition(d=1,a=1); + +DROP TABLE sampling_test; +CREATE TABLE sampling_test (x STRING, y STRING) PARTITIONED BY (d INT, a INT) CLUSTERED BY (y) +INTO 8 BUCKETS ROW FORMAT +DELIMITED FIELDS TERMINATED BY ',' +COLLECTION ITEMS TERMINATED BY '/' +MAP KEYS TERMINATED BY '=' +STORED AS TEXTFILE +LOCATION '${hiveconf:test.blobstore.path.unique}/sampling/sampling_test'; + +INSERT OVERWRITE TABLE sampling_test PARTITION (d=1,a=1) SELECT x,y FROM sampling_intermediate; + +SET hive.conf.validation=false; +SELECT COUNT(1) FROM sampling_test TABLESAMPLE (BUCKET 1 OUT OF 8); + diff --git a/itests/hive-blobstore/src/test/results/clientpositive/sampling.q.out b/itests/hive-blobstore/src/test/results/clientpositive/sampling.q.out new file mode 100644 index 0000000..ec7337e --- /dev/null +++ b/itests/hive-blobstore/src/test/results/clientpositive/sampling.q.out @@ -0,0 +1,84 @@ +PREHOOK: query: DROP TABLE sampling_intermediate +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE sampling_intermediate +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE sampling_intermediate (x STRING, y STRING) PARTITIONED BY (d INT, a INT) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY ',' +COLLECTION ITEMS TERMINATED BY '/' +MAP KEYS TERMINATED BY '=' +STORED AS TEXTFILE +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +PREHOOK: Input: ### test.blobstore.path ###/sampling/sampling_intermediate +PREHOOK: Output: database:default +PREHOOK: Output: default@sampling_intermediate +POSTHOOK: query: CREATE TABLE sampling_intermediate (x STRING, y STRING) PARTITIONED BY (d INT, a INT) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY ',' +COLLECTION ITEMS TERMINATED BY '/' +MAP KEYS TERMINATED BY '=' +STORED AS TEXTFILE +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: ### test.blobstore.path ###/sampling/sampling_intermediate +POSTHOOK: Output: database:default +POSTHOOK: Output: default@sampling_intermediate +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/sampling_data' INTO TABLE sampling_intermediate partition(d=1,a=1) +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@sampling_intermediate +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/sampling_data' INTO TABLE sampling_intermediate partition(d=1,a=1) +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@sampling_intermediate +POSTHOOK: Output: default@sampling_intermediate@d=1/a=1 +PREHOOK: query: DROP TABLE sampling_test +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE sampling_test +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE sampling_test (x STRING, y STRING) PARTITIONED BY (d INT, a INT) CLUSTERED BY (y) +INTO 8 BUCKETS ROW FORMAT +DELIMITED FIELDS TERMINATED BY ',' +COLLECTION ITEMS TERMINATED BY '/' +MAP KEYS TERMINATED BY '=' +STORED AS TEXTFILE +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +PREHOOK: Input: ### test.blobstore.path ###/sampling/sampling_test +PREHOOK: Output: database:default +PREHOOK: Output: default@sampling_test +POSTHOOK: query: CREATE TABLE sampling_test (x STRING, y STRING) PARTITIONED BY (d INT, a INT) CLUSTERED BY (y) +INTO 8 BUCKETS ROW FORMAT +DELIMITED FIELDS TERMINATED BY ',' +COLLECTION ITEMS TERMINATED BY '/' +MAP KEYS TERMINATED BY '=' +STORED AS TEXTFILE +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: ### test.blobstore.path ###/sampling/sampling_test +POSTHOOK: Output: database:default +POSTHOOK: Output: default@sampling_test +PREHOOK: query: INSERT OVERWRITE TABLE sampling_test PARTITION (d=1,a=1) SELECT x,y FROM sampling_intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@sampling_intermediate +PREHOOK: Input: default@sampling_intermediate@d=1/a=1 +PREHOOK: Output: default@sampling_test@d=1/a=1 +POSTHOOK: query: INSERT OVERWRITE TABLE sampling_test PARTITION (d=1,a=1) SELECT x,y FROM sampling_intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sampling_intermediate +POSTHOOK: Input: default@sampling_intermediate@d=1/a=1 +POSTHOOK: Output: default@sampling_test@d=1/a=1 +POSTHOOK: Lineage: sampling_test PARTITION(d=1,a=1).x SIMPLE [(sampling_intermediate)sampling_intermediate.FieldSchema(name:x, type:string, comment:null), ] +POSTHOOK: Lineage: sampling_test PARTITION(d=1,a=1).y SIMPLE [(sampling_intermediate)sampling_intermediate.FieldSchema(name:y, type:string, comment:null), ] +PREHOOK: query: SELECT COUNT(1) FROM sampling_test TABLESAMPLE (BUCKET 1 OUT OF 8) +PREHOOK: type: QUERY +PREHOOK: Input: default@sampling_test +PREHOOK: Input: default@sampling_test@d=1/a=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(1) FROM sampling_test TABLESAMPLE (BUCKET 1 OUT OF 8) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sampling_test +POSTHOOK: Input: default@sampling_test@d=1/a=1 +#### A masked pattern was here #### +4