Uploaded image for project: 'UIMA'
  1. UIMA
  2. UIMA-4049

The curious case of the zombie annotation

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Closed
    • Major
    • Resolution: Done
    • None
    • 2.7.0SDK
    • Core Java Framework
    • None

    Description

      When annotations are removed from indexes, sometimes they come back... the following test case shows how an annotation is removed but still present when iterating over the index later.

          @Test
          public void testForZombies() throws Exception
          {
              // No zombie here
              int[] offsets1 = { 0, 4, 5, 11, 12, 21, 22, 25, 26, 29, 30, 35, 36, 40, 41, 50, 51, 60, 61,
                      64, 64, 65 };
              testForZombies("Dies flößte Friedrich II. für seine neue Eroberung Besorgnis ein.", offsets1);
              
              // Zombie hiding in here
              int[] offsets2 = { 0, 3, 4, 7, 8, 13, 14, 18, 19, 22, 23, 33, 34, 35 };
              testForZombies("Ich bin Franz III. von Hammerfels !", offsets2);
          }
      
          public void testForZombies(String aText, int[] aOffsets) throws Exception
          {
              // Init some dictionaries we ues
              Set<String> names = new HashSet<String>();
              names.add("Friedrich");
              names.add("Franz");
      
              Set<String> suffix = new HashSet<String>();
              suffix.add("II.");
              suffix.add("III.");
      
              // Set up type system
              TypeSystemDescription tsd = new TypeSystemDescription_impl();
              tsd.addType("Token", "", CAS.TYPE_NAME_ANNOTATION);
              
              // Create CAS
              CAS jcas = CasCreationUtils.createCas(tsd, null, null);
              jcas.setDocumentText(aText);
              
              Type tokenType = jcas.getTypeSystem().getType("Token");
              Feature beginFeature = tokenType.getFeatureByBaseName("begin");
              
              // Create tokens in CAS
              for (int i = 0; i < aOffsets.length; i += 2) {
                  jcas.addFsToIndexes(jcas.createAnnotation(tokenType, aOffsets[i], aOffsets[i+1]));
              }
              
              // List the tokens in the CAS
              for (AnnotationFS token : jcas.getAnnotationIndex(tokenType)) {
                  System.out.printf("Starting with %s%n", token.getCoveredText());
              }
      
              // Merge some tokens, in particular "Franz" "III." -> "Franz III." and "Friedrich" "II."
              // into "Friedrich II."
              AnnotationFS previous = null;
              List<AnnotationFS> toDelete = new ArrayList<>();
              for (AnnotationFS token : jcas.getAnnotationIndex(tokenType)) {
                  if (previous != null && names.contains(previous.getCoveredText())
                          && suffix.contains(token.getCoveredText())) {
                      token.setIntValue(beginFeature, previous.getBegin());
                      toDelete.add(previous);
                  }
                  previous = token;
              }
      
              // Remove the no longer necessary tokens ("Friedrich" and "Franz"), since we expanded the
              // following tokens "III." and "II." to include their text
              Set<String> removedWords = new HashSet<String>();
              for (AnnotationFS token : toDelete) {
                  System.out.printf("Removing %s%n", token.getCoveredText());
                  removedWords.add(token.getCoveredText());
                  jcas.removeFsFromIndexes(token);
              }
      
              // Check if the tokens that we wanted to remove are really gone
              for (AnnotationFS token : jcas.getAnnotationIndex(tokenType)) {
                  System.out.printf("Remaining %s%n", token.getCoveredText());
                  if (removedWords.contains(token.getCoveredText())) {
                     org.junit.Assert.fail("I saw a zombie!!!");
                  }
              }
          }
      

      Attachments

        1. CuriousTestCase.java
          5 kB
          Richard Eckart de Castilho

        Issue Links

          Activity

            People

              schor Marshall Schor
              rec Richard Eckart de Castilho
              Votes:
              0 Vote for this issue
              Watchers:
              5 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved: