Uploaded image for project: 'Apache Arrow'
  1. Apache Arrow
  2. ARROW-14417

[R] Joins ignore projection on left dataset

    XMLWordPrintableJSON

Details

    Description

      left <- Table$create(
        one = c("a", "b"), 
        two = 1:2
      )
      right <- Table$create(
        three = TRUE, 
        dos = 2L
      )
      
      left %>% 
        rename(dos = two)
      
      # InMemoryDataset (query)
      # one: string
      # dos: int32
      
      # See $.data for the source Arrow object
      
      left %>% 
        rename(dos = two) %>% 
        left_join(right)
      
      # InMemoryDataset (query)
      # one: string
      # dos: int32
      # three: bool
      
      # See $.data for the source Arrow object
      
      left %>% 
        rename(dos = two) %>% 
        left_join(right) %>% 
        collect()
      
      # Error: Invalid: No match or multiple matches for key field reference FieldRef.Name(dos) on left side of the join
      # ../src/arrow/compute/exec/hash_join_node.cc:95  ValidateSchemas(join_type, left_schema, left_keys, left_output, right_schema, right_keys, right_output, left_field_name_prefix, right_field_name_prefix)
      # ../src/arrow/compute/exec/hash_join_node.cc:311  schema_mgr->Init( join_options.join_type, *(inputs[0]->output_schema()), join_options.left_keys, join_options.left_output, *(inputs[1]->output_schema()), join_options.right_keys, join_options.right_output, join_options.output_prefix_for_left, join_options.output_prefix_for_right)
      

      Attachments

        Issue Links

          Activity

            People

              npr Neal Richardson
              npr Neal Richardson
              Votes:
              0 Vote for this issue
              Watchers:
              2 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                Time Tracking

                  Estimated:
                  Original Estimate - Not Specified
                  Not Specified
                  Remaining:
                  Remaining Estimate - 0h
                  0h
                  Logged:
                  Time Spent - 2h
                  2h