Uploaded image for project: 'ORC'
  1. ORC
  2. ORC-1304

[C++] throw ParseError when using SearchArgument with nested struct

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Major
    • Resolution: Fixed
    • 1.9.0, 1.8.3, 1.7.9
    • 1.9.0, 1.8.4, 1.7.10
    • None
    • None

    Description

      link title
      code example: 

      WriterOptions options;
        auto stream = writeLocalFile("orc_file_test");
        MemoryPool* pool = getDefaultPool();
        std::unique_ptr<Type> type(Type::buildTypeFromString(
            "struct<col0:struct<col1:int>,col2:struct<col3:int>>"));
      
        size_t num = 50000;
        std::unique_ptr<Writer> writer = createWriter(*type, stream.get(), options);
      
        std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(num);
        StructVectorBatch* structBatch =
            dynamic_cast<StructVectorBatch*>(batch.get());
        StructVectorBatch* structBatch2 =
            dynamic_cast<StructVectorBatch*>(structBatch->fields[0]);
        LongVectorBatch* intBatch =
            dynamic_cast<LongVectorBatch*>(structBatch2->fields[0]);
      
        StructVectorBatch* structBatch3 =
            dynamic_cast<StructVectorBatch*>(structBatch->fields[1]);
        LongVectorBatch* intBatch2 =
            dynamic_cast<LongVectorBatch*>(structBatch3->fields[0]);
      
        structBatch->numElements = num;
        structBatch2->numElements = num;
      
        structBatch3->numElements = num;
        structBatch3->hasNulls = true;
      
        for (int64_t i = 0; i < num; ++i) {
          intBatch->data.data()[i] = rand() % 150000;
          intBatch->notNull[i] = 1;
      
          intBatch2->notNull[i] = 0;
          intBatch2->hasNulls = true;
      
          structBatch3->notNull[i] = 0;
        }
        intBatch->hasNulls = false;
      
        writer->add(*batch);
        writer->close();
      
        ReaderOptions readOptions;
        readOptions.setMemoryPool(*getDefaultPool());
        auto reader = createReader(readLocalFile("orc_file_test"), readOptions);
        orc::RowReaderOptions rowOptions;
        rowOptions.searchArgument(
            SearchArgumentFactory::newBuilder()
                ->startAnd()
                .equals(2, PredicateDataType::LONG, Literal((int64_t)5))
                .end()
                .build());
        std::unique_ptr<RowReader> rowReader = reader->createRowReader(rowOptions);
      
        batch = rowReader->createRowBatch(num);
        structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
        structBatch2 = dynamic_cast<StructVectorBatch*>(structBatch->fields[0]);
        intBatch = dynamic_cast<LongVectorBatch*>(structBatch2->fields[0]);
      
        structBatch3 = dynamic_cast<StructVectorBatch*>(structBatch->fields[1]);
      
        while (rowReader->next(*batch)) {
          for (size_t i = 0; i < batch->numElements; i++) {
            
          }
        }
      

      stack trace

      terminate called after throwing an instance of 'orc::ParseError'
        what():  bad read in nextBuffer
      *** Aborted at 1666816640 (Unix time, try 'date -d @1666816640') ***
      *** Signal 6 (SIGABRT) (0x2035c0002b7ad) received by PID 178093 (pthread TID 0x7ffb12545a80) (linux TID 178093) (maybe from PID 178093, UID 131932) (code: -6), stack trace: ***
          @ 0000000000000000 (unknown)
          @ 000000000009c9d3 __GI___pthread_kill
          @ 00000000000444ec __GI_raise
          @ 000000000002c432 __GI_abort
          @ 00000000000a3fd4 __gnu_cxx::__verbose_terminate_handler()
          @ 00000000000a1b39 __cxxabiv1::__terminate(void (*)())
          @ 00000000000a1ba4 std::terminate()
          @ 00000000000a1e6f __cxa_throw
          @ 0000000001efcd55 __cxa_throw
          @ 00000000075b676c orc::BooleanRleDecoderImpl::seek(orc::PositionProvider&)
                             /home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/ByteRLE.cc:526
          @ 00000000075af711 orc::IntegerColumnReader::seekToRowGroup(std::unordered_map<unsigned long, orc::PositionProvider, std::hash<unsigned long>, std::equal_to<unsigned long>, std::allocator<std::pair<unsigned long const, orc::PositionProvider> > >&)
                             /home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/ColumnReader.cc:120
          @ 00000000075af67f orc::StructColumnReader::seekToRowGroup(std::unordered_map<unsigned long, orc::PositionProvider, std::hash<unsigned long>, std::equal_to<unsigned long>, std::allocator<std::pair<unsigned long const, orc::PositionProvider> > >&)
                             /home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/ColumnReader.cc:965
          @ 00000000075af67f orc::StructColumnReader::seekToRowGroup(std::unordered_map<unsigned long, orc::PositionProvider, std::hash<unsigned long>, std::equal_to<unsigned long>, std::allocator<std::pair<unsigned long const, orc::PositionProvider> > >&)
                             /home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/ColumnReader.cc:965
          @ 0000000007598179 orc::RowReaderImpl::seekToRowGroup(unsigned int)
                             /home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/Reader.cc:440
          @ 000000000759d700 orc::RowReaderImpl::startNextStripe()
                             /home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/Reader.cc:1037
          @ 000000000759daf4 orc::RowReaderImpl::next(orc::ColumnVectorBatch&)
                             /home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/Reader.cc:1055
          @ 0000000002fba9bc main
          @ 000000000002c656 __libc_start_call_main
          @ 000000000002c717 __libc_start_main_alias_2
          @ 0000000002fb2780 _start
      

      Attachments

        Activity

          People

            rex_xinzh ZhangXin
            rex_xinzh ZhangXin
            Votes:
            0 Vote for this issue
            Watchers:
            1 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: