Details
Description
In the presence of mixed type keys, v1new ReadFromDatastore may return duplicate items. The attached example returns 4 records, not the expected 3.
// code placeholder from __future__ import unicode_literals import apache_beam as beam from apache_beam.io.gcp.datastore.v1new.types import Key, Entity, Query from apache_beam.io.gcp.datastore.v1new import datastoreio config = dict(project='your-google-project', namespace='test') def test_mixed(): keys = [ Key(['mixed', '10038260-iperm_eservice'], **config), Key(['mixed', 4812224868188160], **config), Key(['mixed', '99152975-pointshop'], **config) ] entities = map(lambda key: Entity(key=key), keys) with beam.Pipeline() as p: (p | beam.Create(entities) | datastoreio.WriteToDatastore(project=config['project']) ) query = Query(kind='mixed', **config) with beam.Pipeline() as p: (p | datastoreio.ReadFromDatastore(query=query, num_splits=4) | beam.io.WriteToText('tmp.txt', num_shards=1, shard_name_template='') ) items = open('tmp.txt').read().strip().split('\n') assert len(items) == 3, 'incorrect number of items'
Attachments
Issue Links
- links to