Uploaded image for project: 'UIMA'
  1. UIMA
  2. UIMA-3141

Binary CAS format 6 + type filtering fails to deserialize document annotation correctly

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Open
    • Major
    • Resolution: Unresolved
    • 2.4.1SDK
    • None
    • Core Java Framework
    • None

    Description

      When a custom document annotation type is used, the language is not properly restored after deserializing from CAS format 6.

      Expected: deserialized CAS has language "latin"

      Actual: deserialized CAS has language "x-unspecified"

      If the line sourceCas.addFsToIndexes(ma); is commented out, the code works.

      import static org.junit.Assert.assertEquals;
      import static org.junit.Assert.assertTrue;
      
      import java.io.File;
      import java.io.FileInputStream;
      import java.io.FileOutputStream;
      import java.io.InputStream;
      import java.io.OutputStream;
      
      import org.apache.commons.io.IOUtils;
      import org.apache.uima.cas.CAS;
      import org.apache.uima.cas.impl.Serialization;
      import org.apache.uima.cas.text.AnnotationFS;
      import org.apache.uima.resource.metadata.TypeSystemDescription;
      import org.apache.uima.resource.metadata.impl.TypeSystemDescription_impl;
      import org.apache.uima.util.CasCreationUtils;
      import org.junit.Rule;
      import org.junit.Test;
      import org.junit.rules.TemporaryFolder;
      
      public class MinimalTest
      {
          @Rule
          public TemporaryFolder testFolder = new TemporaryFolder();
      
          @Test
          public void test()
              throws Exception
          {
              TypeSystemDescription sourceTsd = new TypeSystemDescription_impl();
              sourceTsd.addType("DocMeta", "", CAS.TYPE_NAME_DOCUMENT_ANNOTATION);
              TypeSystemDescription targetTsd = new TypeSystemDescription_impl();
      
              CAS sourceCas = CasCreationUtils.createCas(sourceTsd, null, null);
              AnnotationFS ma = sourceCas.createAnnotation(sourceCas.getTypeSystem().getType("DocMeta"),
                      0, 0);
              sourceCas.addFsToIndexes(ma);
              sourceCas.setDocumentLanguage("latin");
              sourceCas.setDocumentText("test");
      
              File file = testFolder.newFile("test.bin");
      
              OutputStream os = new FileOutputStream(file);
              Serialization.serializeWithCompression(sourceCas, os, sourceCas.getTypeSystem());
              IOUtils.closeQuietly(os);
      
              assertTrue(new File(testFolder.getRoot(), "test.bin").exists());
      
              CAS targetCas = CasCreationUtils.createCas(targetTsd, null, null);
              InputStream is = new FileInputStream(file);
              Serialization.deserializeCAS(targetCas, is, sourceCas.getTypeSystem(), null);
              IOUtils.closeQuietly(is);
      
              assertEquals("latin", targetCas.getDocumentLanguage());
              assertEquals("test", targetCas.getDocumentText());
          }
      }
      

      Attachments

        Activity

          People

            schor Marshall Schor
            rec Richard Eckart de Castilho
            Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

            Dates

              Created:
              Updated: