Details
-
Improvement
-
Status: Open
-
Major
-
Resolution: Unresolved
-
None
-
None
-
None
-
None
Description
When using Sqoop (import + --incremental lastmodified + --check-column) if the check column contains NULLs the rows are ignored.
Please consider adding the ability to include check column NULL values
################# # STEP 01 - Create RDBMS Table and Data ################# export MYCONN=jdbc:oracle:thin:@myoracle.mydomain.com:1521/db11g export MYUSER=sqoop export MYPSWD=sqoop sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query "drop table t1" sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query "create table t1 (c1 int, c2 date, c3 varchar(15), c4 timestamp)" sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query "insert into t1 values (1, sysdate, 'data row 1', sysdate)" sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query "select * from t1 order by c1" ------------------------------------------------------------------ | C1 | C2 | C3 | C4 | ------------------------------------------------------------------ | 1 | 2017-05-04 15:17:33.0 | data row 1 | 2017-05-04 15:17:33 | ------------------------------------------------------------------ ################# # STEP 02 - Create Sqoop Incremental Import Job ################# sqoop job --create inc_import_hdfs -- import --connect $MYCONN --username $MYUSER --password $MYPSWD --table T1 --target-dir /user/root/t1 --incremental lastmodified --check-column C4 --merge-key C1 --last-value '2017-01-01 00:00:00' --as-textfile --map-column-java C2=String,C4=String --fields-terminated-by '\001' --lines-terminated-by '\n' --num-mappers 1 --verbose --hive-drop-import-delims ################# # STEP 03 - Execute Job and Verify data in HDFS ################# hdfs dfs -rm -r /user/root/t1 sqoop job --exec inc_import_hdfs -- --connect $MYCONN --username $MYUSER --password $MYPSWD hdfs dfs -cat /user/root/t1/part* Output: 17/05/04 15:25:20 INFO mapreduce.ImportJobBase: Transferred 55 bytes in 24.6352 seconds (2.2326 bytes/sec) 17/05/04 15:25:20 INFO mapreduce.ImportJobBase: Retrieved 1 records. ~~~~~ 12017-05-04 15:17:33.0data row 12017-05-04 15:17:33 ################# # STEP 04 - Insert New Rows (one with NULL --check-column) ################# sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query "insert into t1 values (2, sysdate, 'data row 2', NULL)" sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query "insert into t1 values (3, sysdate, 'data row 3', sysdate)" sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query "select * from t1 order by c1" Output: ------------------------------------------------------------------ | C1 | C2 | C3 | C4 | ------------------------------------------------------------------ | 1 | 2017-05-04 15:17:33.0 | data row 1 | 2017-05-04 15:17:33 | | 2 | 2017-05-04 15:27:19.0 | data row 2 | (null) | | 3 | 2017-05-04 15:27:22.0 | data row 3 | 2017-05-04 15:27:22 | ------------------------------------------------------------------ ################# # STEP 05 - Execute Job and Verify data in HDFS (row with NULL --check-column is not imported) ################# sqoop job --exec inc_import_hdfs -- --connect $MYCONN --username $MYUSER --password $MYPSWD hdfs dfs -cat /user/root/t1/part* Output: 17/05/04 15:28:22 INFO mapreduce.ImportJobBase: Transferred 55 bytes in 21.6227 seconds (2.5436 bytes/sec) 17/05/04 15:28:22 INFO mapreduce.ImportJobBase: Retrieved 1 records. ~~~~~ 12017-05-04 15:17:33.0data row 12017-05-04 15:17:33 32017-05-04 15:27:22.0data row 32017-05-04 15:27:22 ################# # STEP 06 - Update Row with NULL --check-column, Execute Job and Verify data in HDFS ################# sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query "update t1 set c4 = sysdate where c1 = 2" sqoop job --exec inc_import_hdfs -- --connect $MYCONN --username $MYUSER --password $MYPSWD hdfs dfs -cat /user/root/t1/part* Output: 17/05/04 15:32:47 INFO mapreduce.ImportJobBase: Transferred 55 bytes in 27.2132 seconds (2.0211 bytes/sec) 17/05/04 15:32:47 INFO mapreduce.ImportJobBase: Retrieved 1 records. ~~~~~ 12017-05-04 15:17:33.0data row 12017-05-04 15:17:33 22017-05-04 15:27:19.0data row 22017-05-04 15:31:55 <=== 32017-05-04 15:27:22.0data row 32017-05-04 15:27:22