Description
A user attempted to use the incremental UPDATE STATISTICS feature on a 90 billion row table. The persistent sample table was created using a sampling rate of 5 per cent, and so it had about 4.5 billion rows. The incremental UPDATE STATISTICS command failed with an error like the following:
>>Update statistics for table traf1 on existing columns incremental where C2 >= '2017-06-10 00:00:00';
-
-
- ERROR[9219] Incremental UPDATE STATISTICS: An operation failed, possibly due to an invalid WHERE clause.
-
-
-
- ERROR[15001] A syntax error occurred at or before:
SELECT FROM TRAFODION.SEABASE.TRAF_SAMPLE_32_1498066565_999632 WHERE C2 >= '2
^ (12 characters from start of SQL statement)
- ERROR[15001] A syntax error occurred at or before:
-
-
-
- ERROR[8822] The statement was not prepared.
-
— SQL operation failed with errors.
The script below reproduces the problem on a workstation. The script creates a table, puts a few rows in it, creates a sample table, then doctors the sample table metadata to make UPDATE STATISTICS think it has 4.5 billion rows. The failure then occurs.
?section setup
drop table if exists traf1;
CREATE TABLE traf1
(
C1 CHAR(32) CHARACTER SET ISO88591 COLLATE
DEFAULT NO DEFAULT NOT NULL NOT DROPPABLE NOT SERIALIZED
, C2 TIMESTAMP(6) NO DEFAULT NOT NULL NOT
DROPPABLE NOT SERIALIZED
, C3 CHAR(32) CHARACTER SET ISO88591 COLLATE
DEFAULT NO DEFAULT NOT NULL NOT DROPPABLE NOT SERIALIZED
, C4 VARCHAR(4096 BYTES) CHARACTER SET UTF8
COLLATE DEFAULT NO DEFAULT NOT NULL NOT DROPPABLE NOT SERIALIZED
, C5 VARCHAR(1024 BYTES) CHARACTER SET UTF8
COLLATE DEFAULT DEFAULT NULL NOT SERIALIZED
, C6 SMALLINT DEFAULT 1 NOT SERIALIZED
, PRIMARY KEY (C1 ASC, C2 DESC, C3 ASC)
)
SALT USING 4 PARTITIONS
ON (C1)
ATTRIBUTES ALIGNED FORMAT
HBASE_OPTIONS
(
DATA_BLOCK_ENCODING = 'FAST_DIFF',
BLOCKSIZE = '262144',
MEMSTORE_FLUSH_SIZE = '1073741824'
)
;
insert into traf1 values ('abcdef',current_timestamp - interval '20' day,
'0123456789abcdef','www.xyz.stuff.com','a bit of stuff',14);
insert into traf1 values ('abcdef01',current_timestamp - interval '20' day,
'0123456789abcdef01','www.xyz.morestuff.com','a bit of more stuff',12);
insert into traf1 values ('012345',current_timestamp - interval '20' day,
'abc012','www.blahblah.com','a bit of it is fun',2);
insert into traf1 values ('01234567',current_timestamp - interval '20' day,
'abc01234','www.blahblahblah.com','a bit of it is fun',31);
insert into traf1 values ('def01234',current_timestamp - interval '20' day,
'0123456789abcdef','www.xyz.stuff.com','a bit of stuff',14);
insert into traf1 values ('adbf01',current_timestamp - interval '20' day,
'0123456789abcdef01','www.xyz.morestuff.com','a bit of more stuff',12);
insert into traf1 values ('a0b0c0',current_timestamp - interval '20' day,
'abc012','www.blahblah.com','a bit of it is fun',2);
insert into traf1 values ('021357',current_timestamp - interval '20' day,
'abc01234','www.blahblahblah.com','a bit of it is fun',30);
insert into traf1 values ('abcdef',current_timestamp - interval '10' day,
'0123456789abcdef','www.xyz.stuff.com','a bit of stuff',14);
insert into traf1 values ('abcddf01',current_timestamp - interval '10' day,
'0123456789abcdef01','www.xyz.morestuff.com','a bit of more stuff',12);
insert into traf1 values ('01234345',current_timestamp - interval '10' day,
'abc012','www.blahblah.com','a bit of it is fun',2);
insert into traf1 values ('034567',current_timestamp - interval '10' day,
'abc01234','www.blahblahblah.com','a bit of it is fun',31);
insert into traf1 values ('def8201234',current_timestamp - interval '10' day,
'0123456789abcdef','www.xyz.stuff.com','a bit of stuff',18);
insert into traf1 values ('adbf9a01',current_timestamp - interval '10' day,
'0123456789abcdef01','www.C4edin.com','a bit of try twice',2);
insert into traf1 values ('a0bbc0c0',current_timestamp - interval '10' day,
'abc012','www.blahblah.com','a bit of it is fun',2);
insert into traf1 values ('02111357',current_timestamp - interval '10' day,
'abc01234','www.blahblahblah.com','a bit of it is fun',30);
insert into traf1 values ('def01234',current_timestamp,'01234789abcdef','www.xyz.stuff.com','a bit of stuff',14);
insert into traf1 values ('adbf01',current_timestamp,'0123456789abef01','www.xyz.morestuff.com','a bit of more stuff',12);
insert into traf1 values ('a0b0c0',current_timestamp,'abc03452','www.blahblah.com','a bit of it is fun',21);
insert into traf1 values ('02431357',current_timestamp,'abcd001234','www.blahblahblah.com','a bit of it is fun',39);
insert into traf1 values ('defab01234',current_timestamp,'01a0234789abcdef','www.cnn.com','a bit of stuff',14);
insert into traf1 values ('adb9ef01',current_timestamp,'012333456789abef01','www.youtube.com','a bit of more stuff',8);
insert into traf1 values ('a2b0b0c0',current_timestamp,'ab3452','www.blahblah.com','a bit of it is fun',21);
insert into traf1 values ('0243731357',current_timestamp,'abcd00122a34','www.blahblahblah.com','a bit of it is fun',39);
?section repro
UPDATE STATISTICS FOR TABLE traf1 ON EVERY COLUMN SAMPLE RANDOM 50 PERCENT PERSISTENT;
– doctor REQUESTED_SAMPLE_ROWS, ACTUAL_SAMPLE_ROWS, SAMPLING_RATIO
– from sb_persistent_samples to make UPDATE STATS think the sample table
– has 4.5 billion rows
update sb_persistent_samples set REQUESTED_SAMPLE_ROWS = 4500000000, ACTUAL_SAMPLE_ROWS = 4500000000
where table_uid = (select object_uid from "MD".objects where object_name = 'TRAF1');
cqd ustat_log 'reproULOG.txt';
update statistics log on;
Update statistics for table traf1 on existing columns incremental where C2 >= '2017-06-10 00:00:00';
exit;