Details
-
Bug
-
Status: Closed
-
Major
-
Resolution: Fixed
-
0.7.0
-
None
Description
#Fri Dec 12 11:47:55 EST 2014
git.commit.id.abbrev=d925eab
Have the following json data:
0: jdbc:drill:schema=dfs.drillTestDir> select t.soa from `complex.json` t limit 10; +------------+ | soa | +------------+ | [{"in":1},{"in":1,"fl":1.12345},{"in":1,"fl":10.12345,"nul":"not null"},{"in":1,"fl":10.6789,"nul":"not null","bool":true,"str":"here is a string at row 1"}] | | [{"in":2},{"in":2,"fl":2.12345},{"in":2,"fl":20.12345,"nul":"not null"},{"in":2,"fl":20.6789,"bool":false,"str":"here is a string at row 2"}] | | [{"in":3},{"in":3,"fl":3.12345},{"in":3,"fl":30.12345,"nul":"not null"},{"in":3,"fl":30.6789,"nul":"not null","bool":true,"str":"here is a string at row 3"}] | | [{"in":4},{"in":4,"fl":4.12345},{"in":4,"fl":40.12345,"nul":"not null"},{"in":4,"fl":40.6789,"bool":true,"str":"here is a string at row 4"}] | | [{"in":5},{"in":5,"fl":5.12345},{"in":5,"fl":50.12345,"nul":"not null"},{"in":5,"fl":50.6789,"nul":"not null","bool":true,"str":"here is a string at row 5"}] | | [{"in":6},{"in":6,"fl":6.12345},{"in":6,"fl":60.12345,"nul":"not null"},{"in":6,"fl":60.6789,"nul":"not null","bool":true,"str":"here is a string at row 6"}] | | [{"in":7},{"in":7,"fl":7.12345},{"in":7,"fl":70.12345},{"in":7,"fl":70.6789,"nul":"not null","bool":false,"str":"here is a string at row 7"}] | | [{"in":8},{"in":8,"fl":8.12345},{"in":8,"fl":80.12345},{"in":8,"fl":80.6789,"bool":true,"str":"here is a string at row 8"}] | | [{"in":9},{"in":9,"fl":9.12345},{"in":9,"fl":90.12345,"nul":"not null"},{"in":9,"fl":90.6789,"nul":"not null","bool":true,"str":"here is a string at row 9"}] | | [{"in":10},{"in":10,"fl":10.12345},{"in":10,"fl":100.12345},{"in":10,"fl":100.6789,"bool":false,"str":"here is a string at row 10"}] | +------------+
For some of the rows, for the key named 'nul', it is missing, hence returning null:
0: jdbc:drill:schema=dfs.drillTestDir> select t.soa[2].nul, t.soa[3].bool from `complex.json` t limit 10; +------------+------------+ | EXPR$0 | EXPR$1 | +------------+------------+ | not null | true | | not null | false | | not null | true | | not null | true | | not null | true | | not null | true | | null | false | | null | true | | not null | true | | null | false | +------------+------------+
But when I do a count on that, the null value still counted:
0: jdbc:drill:schema=dfs.drillTestDir> select count(t.soa[2].nul), count(t.soa[3].bool) from `complex.json` t limit 10; +------------+------------+ | EXPR$0 | EXPR$1 | +------------+------------+ | 1000000 | 1000000 | +------------+------------+
Here is the physical plan:
0: jdbc:drill:schema=dfs.drillTestDir> explain plan for select count(t.soa[2].nul), count(t.soa[3].bool) from `complex.json` t; +------------+------------+ | text | json | +------------+------------+ | 00-00 Screen 00-01 Project(EXPR$0=[$0], EXPR$1=[$0]) 00-02 StreamAgg(group=[{}], EXPR$0=[$SUM0($0)]) 00-03 StreamAgg(group=[{}], EXPR$0=[COUNT()]) 00-04 Project($f0=[ITEM(ITEM($0, 2), 'nul')], $f1=[ITEM(ITEM($0, 3), 'bool')]) 00-05 Scan(groupscan=[EasyGroupScan [selectionRoot=/drill/testdata/complex_type/json/complex.json, numFiles=1, columns=[`soa`[2].`nul`, `soa`[3].`bool`], files=[maprfs:/drill/testdata/complex_type/json/complex.json]]]) | { "head" : { "version" : 1, "generator" : { "type" : "ExplainHandler", "info" : "" }, "type" : "APACHE_DRILL_PHYSICAL", "options" : [ ], "queue" : 0, "resultMode" : "EXEC" }, "graph" : [ { "pop" : "fs-scan", "@id" : 5, "files" : [ "maprfs:/drill/testdata/complex_type/json/complex.json" ], "storage" : { "type" : "file", "enabled" : true, "connection" : "maprfs:///", "workspaces" : { "root" : { "location" : "/", "writable" : false, "defaultInputFormat" : null }, "tmp" : { "location" : "/tmp", "writable" : true, "defaultInputFormat" : "csv" }, "drillTestDir" : { "location" : "/drill/testdata/", "writable" : true, "defaultInputFormat" : "parquet" }, "drillTestDirComplexJson" : { "location" : "/drill/testdata/complex_type/json", "writable" : true, "defaultInputFormat" : "json" }, "drillTestDirAmplab" : { "location" : "/drill/testdata/amplab", "writable" : true, "defaultInputFormat" : "parquet" }, "drillTestDirInformationSchema" : { "location" : "/drill/testdata/information-schema", "writable" : true, "defaultInputFormat" : "parquet" }, "drillTestDirUdfs" : { "location" : "/drill/testdata/udfs/", "writable" : true, "defaultInputFormat" : "parquet" }, "drillTestDirP1" : { "location" : "/drill/testdata/p1tests", "writable" : true, "defaultInputFormat" : "parquet" }, "drillTestDirTpch10Parquet" : { "location" : "/drill/testdata/tpch10", "writable" : true, "defaultInputFormat" : "parquet" }, "Join" : { "location" : "/drill/testdata/join", "writable" : true, "defaultInputFormat" : "parquet" }, "NoExtJson" : { "location" : "/drill/testdata/no-extension/json", "writable" : true, "defaultInputFormat" : "json" }, "NoExtParquet" : { "location" : "/drill/testdata/no-extension/parquet", "writable" : true, "defaultInputFormat" : "parquet" }, "NoExtParquetNull" : { "location" : "/drill/testdata/no-extension/parquet", "writable" : true, "defaultInputFormat" : null }, "NoExtText" : { "location" : "/drill/testdata/no-extension/text", "writable" : true, "defaultInputFormat" : "psv" }, "drillTestDirExchanges" : { "location" : "/drill/testdata/exchanges_test", "writable" : true, "defaultInputFormat" : "parquet" }, "TpcHMulti" : { "location" : "/drill/testdata/tpch-multi", "writable" : true, "defaultInputFormat" : "parquet" }, "TpcHMulti100" : { "location" : "/drill/testdata/SF100", "writable" : true, "defaultInputFormat" : "parquet" }, "TpcHMulti1" : { "location" : "/drill/testdata/tpch_SF1", "writable" : true, "defaultInputFormat" : "parquet" }, "drillTestDirExplicit" : { "location" : "/drill/testdata/explicit_cast", "writable" : true, "defaultInputFormat" : "parquet" }, "drillTestDirImplicit" : { "location" : "/drill/testdata/implicit_cast", "writable" : true, "defaultInputFormat" : "parquet" }, "drillTestDirImplicit1" : { "location" : "/drill/testdata/implicit_cast", "writable" : true, "defaultInputFormat" : "parquet" }, "drillTestDirTPCDS" : { "location" : "/user/root/tpcds/parquet", "writable" : true, "defaultInputFormat" : "parquet" }, "TPCDS" : { "location" : "/drill/testdata/tpcds", "writable" : true, "defaultInputFormat" : "parquet" }, "drillMondrian" : { "location" : "/user/root/mondrian", "writable" : true, "defaultInputFormat" : "parquet" }, "drillTestDirDatetime" : { "location" : "/drill/testdata/datetime/datasources", "writable" : true, "defaultInputFormat" : "parquet" }, "drillTestDirViews" : { "location" : "/drill/testdata/views/", "writable" : true, "defaultInputFormat" : "parquet" }, "drillTestDirNumerical" : { "location" : "/drill/testdata/numerical/", "writable" : true, "defaultInputFormat" : "parquet" }, "drillTestDirJson" : { "location" : "/drill/testdata/json_storage/", "writable" : true, "defaultInputFormat" : null }, "drillTestDirTestNewWS" : { "location" : "/drill/testdata/newWS/", "writable" : true, "defaultInputFormat" : null }, "drillTestDirTpch01Text" : { "location" : "/drill/testdata/Tpch0.01/text/", "writable" : true, "defaultInputFormat" : null }, "drillTestDirTpch01Json" : { "location" : "/drill/testdata/Tpch0.01/json/", "writable" : true, "defaultInputFormat" : null }, "drillTestDirTpch01Parquet" : { "location" : "/drill/testdata/Tpch0.01/parquet/", "writable" : true, "defaultInputFormat" : null }, "drillTestDirConvert" : { "location" : "/drill/testdata/convert", "writable" : true, "defaultInputFormat" : null }, "drillTestDirTpch100Text" : { "location" : "/drill/testdata/tpch100/text/", "writable" : true, "defaultInputFormat" : null }, "drillTestDirTpch100Parquet" : { "location" : "/drill/testdata/tpch100/parquet", "writable" : true, "defaultInputFormat" : null }, "drillTestDirAggregate1parquet" : { "location" : "/drill/testdata/tpcds/parquet/s1", "writable" : true, "defaultInputFormat" : null }, "drillTestDirAggregate1csv" : { "location" : "/drill/testdata/tpcds/csv/s1", "writable" : true, "defaultInputFormat" : null }, "drillTestDirAggregate1json" : { "location" : "/drill/testdata/tpcds/json/s1", "writable" : true, "defaultInputFormat" : null }, "drillTestDirMondrian" : { "location" : "/drill/testdata/mondrian", "writable" : true, "defaultInputFormat" : null }, "drillTestDirTpcdsImpalaSF1" : { "location" : "/drill/testdata/tpcds-impala-sf1", "writable" : true, "defaultInputFormat" : null }, "sandbox" : { "location" : "/sandbox", "writable" : true, "defaultInputFormat" : "parquet" }, "sandbox-logs" : { "location" : "/sandbox/flat", "writable" : true, "defaultInputFormat" : "parquet" }, "sandbox-json" : { "location" : "/sandbox/json", "writable" : true, "defaultInputFormat" : "parquet" } }, "formats" : { "psv" : { "type" : "text", "extensions" : [ "tbl" ], "delimiter" : "|" }, "dsv" : { "type" : "text", "extensions" : [ "dat" ], "delimiter" : "|" }, "csv" : { "type" : "text", "extensions" : [ "csv" ], "delimiter" : "," }, "tsv" : { "type" : "text", "extensions" : [ "tsv" ], "delimiter" : "\t" }, "parquet" : { "type" : "parquet" }, "json" : { "type" : "json" } } }, "format" : { "type" : "json" }, "columns" : [ "`soa`[2].`nul`", "`soa`[3].`bool`" ], "selectionRoot" : "/drill/testdata/complex_type/json/complex.json", "cost" : 1186767.0 }, { "pop" : "project", "@id" : 4, "exprs" : [ { "ref" : "`$f0`", "expr" : "`soa`[2].`nul`" }, { "ref" : "`$f1`", "expr" : "`soa`[3].`bool`" } ], "child" : 5, "initialAllocation" : 1000000, "maxAllocation" : 10000000000, "cost" : 1186767.0 }, { "pop" : "streaming-aggregate", "@id" : 3, "child" : 4, "keys" : [ ], "exprs" : [ { "ref" : "`EXPR$0`", "expr" : "count(1) " } ], "initialAllocation" : 1000000, "maxAllocation" : 10000000000, "cost" : 1.0 }, { "pop" : "streaming-aggregate", "@id" : 2, "child" : 3, "keys" : [ ], "exprs" : [ { "ref" : "`EXPR$0`", "expr" : "$sum0(`EXPR$0`) " } ], "initialAllocation" : 1000000, "maxAllocation" : 10000000000, "cost" : 1.0 }, { "pop" : "project", "@id" : 1, "exprs" : [ { "ref" : "`EXPR$0`", "expr" : "`EXPR$0`" }, { "ref" : "`EXPR$1`", "expr" : "`EXPR$0`" } ], "child" : 2, "initialAllocation" : 1000000, "maxAllocation" : 1000000000 | +------------+------------+