Details
-
Bug
-
Status: Closed
-
Major
-
Resolution: Fixed
-
None
-
None
-
None
Description
The query is a bit long:
use dataverse twitter let $common := ( for $t in dataset twitter.ds_tweet_help where similarity-jaccard(word-tokens($t."text"), word-tokens("help")) > 0.0 where ($t."create_at">= datetime("2012-02-01T00:00:00.000Z") and $t."create_at" < datetime("2016-05-23T14:00:57.000Z")) let $set := [ 37,51,24,11,10,34,42,9,44,48,35,4,40,6,20,32,8,49,12,22,28,1,13,45,5,47,21,29,54,17,18,39,19,55,26,27,31,56,41,46,16,30,53,38,25,36,50,33,23,2 ] for $sid in $set where $t.geo_tag.stateID = $sid return $t ) let $hashtag := ( for $t in $common where not(is-null($t.hashtags)) for $h in $t.hashtags group by $tag := $h with $h let $c := count($h) order by $c desc limit 50 return { "key": $tag, "count" : $c} ) return $hashtag
It throws the following error:
SEVERE: Could not resolve type for function-call: asterix:scan-collection, Args:[function-call: asterix:field-access-by-name, Args:[%0->$$0, AString: {hashtags}]],please check whether the used variables has been defined! org.apache.hyracks.algebricks.common.exceptions.AlgebricksException: Could not resolve type for function-call: asterix:scan-collection, Args:[function-call: asterix:field-access-by-name, Args:[%0->$$0, AString: {hashtags}]],please check whether the used variables has been defined! at org.apache.hyracks.algebricks.core.algebra.typing.AbstractTypeEnvironment.getType(AbstractTypeEnvironment.java:48) at org.apache.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator.computeOutputTypeEnvironment(UnnestOperator.java:51) at org.apache.hyracks.algebricks.core.rewriter.base.AlgebricksOptimizationContext.computeAndSetTypeEnvironmentForOperator(AlgebricksOptimizationContext.java:295) at org.apache.hyracks.algebricks.core.algebra.util.OperatorPropertiesUtil.typeOpRec(OperatorPropertiesUtil.java:256) at org.apache.hyracks.algebricks.core.algebra.util.OperatorPropertiesUtil.typeOpRec(OperatorPropertiesUtil.java:249) at org.apache.hyracks.algebricks.core.algebra.util.OperatorPropertiesUtil.typeOpRec(OperatorPropertiesUtil.java:249) ..... Caused by: java.lang.NullPointerException at org.apache.asterix.om.util.NonTaggedFormatUtil.isOptional(NonTaggedFormatUtil.java:105) at org.apache.asterix.om.typecomputer.impl.NonTaggedCollectionMemberResultType.computeType(NonTaggedCollectionMemberResultType.java:46) at org.apache.asterix.dataflow.data.common.AqlExpressionTypeComputer.getTypeForFunction(AqlExpressionTypeComputer.java:86) at org.apache.asterix.dataflow.data.common.AqlExpressionTypeComputer.getType(AqlExpressionTypeComputer.java:57) at org.apache.hyracks.algebricks.core.algebra.typing.AbstractTypeEnvironment.getType(AbstractTypeEnvironment.java:46)
The ddl is following
create dataverse twitter if not exists; use dataverse twitter create type typeUser if not exists as open { id: int64, name: string, screen_name : string, lang : string, location: string, create_at: date, description: string, followers_count: int32, friends_count: int32, statues_count: int64 } create type typePlace if not exists as open{ country : string, country_code : string, full_name : string, id : string, name : string, place_type : string, bounding_box : rectangle } create type typeGeoTag if not exists as open { stateID: int32, stateName: string, countyID: int32, countyName: string, cityID: int32?, cityName: string? } create type typeTweet if not exists as open{ create_at : datetime, id: int64, "text": string, in_reply_to_status : int64, in_reply_to_user : int64, favorite_count : int64, coordinate: point?, retweet_count : int64, lang : string, is_retweet: boolean, hashtags : {{ string }} ?, user_mentions : {{ int64 }} ? , user : typeUser, place : typePlace?, geo_tag: typeGeoTag } create dataset ds_tweet(typeTweet) if not exists primary key id; //with filter on create_at; //"using" "compaction" "policy" CompactionPolicy ( Configuration )? )? create index text_idx if not exists on ds_tweet("text") type keyword; create index location_idx if not exists on ds_tweet(coordinate) type rtree; create index time_idx if not exists on ds_tweet(create_at) type btree; create index state_idx if not exists on ds_tweet(geo_tag.stateID) type btree; create index county_idx if not exists on ds_tweet(geo_tag.countyID) type btree; create index city_idx if not exists on ds_tweet(geo_tag.cityID) type btree;
The interesting finding is that if I change the order of these where clauses in the $common part it will work well.