Details
-
Bug
-
Status: Closed
-
Major
-
Resolution: Done
-
None
-
None
Description
When annotations are removed from indexes, sometimes they come back... the following test case shows how an annotation is removed but still present when iterating over the index later.
@Test public void testForZombies() throws Exception { // No zombie here int[] offsets1 = { 0, 4, 5, 11, 12, 21, 22, 25, 26, 29, 30, 35, 36, 40, 41, 50, 51, 60, 61, 64, 64, 65 }; testForZombies("Dies flößte Friedrich II. für seine neue Eroberung Besorgnis ein.", offsets1); // Zombie hiding in here int[] offsets2 = { 0, 3, 4, 7, 8, 13, 14, 18, 19, 22, 23, 33, 34, 35 }; testForZombies("Ich bin Franz III. von Hammerfels !", offsets2); } public void testForZombies(String aText, int[] aOffsets) throws Exception { // Init some dictionaries we ues Set<String> names = new HashSet<String>(); names.add("Friedrich"); names.add("Franz"); Set<String> suffix = new HashSet<String>(); suffix.add("II."); suffix.add("III."); // Set up type system TypeSystemDescription tsd = new TypeSystemDescription_impl(); tsd.addType("Token", "", CAS.TYPE_NAME_ANNOTATION); // Create CAS CAS jcas = CasCreationUtils.createCas(tsd, null, null); jcas.setDocumentText(aText); Type tokenType = jcas.getTypeSystem().getType("Token"); Feature beginFeature = tokenType.getFeatureByBaseName("begin"); // Create tokens in CAS for (int i = 0; i < aOffsets.length; i += 2) { jcas.addFsToIndexes(jcas.createAnnotation(tokenType, aOffsets[i], aOffsets[i+1])); } // List the tokens in the CAS for (AnnotationFS token : jcas.getAnnotationIndex(tokenType)) { System.out.printf("Starting with %s%n", token.getCoveredText()); } // Merge some tokens, in particular "Franz" "III." -> "Franz III." and "Friedrich" "II." // into "Friedrich II." AnnotationFS previous = null; List<AnnotationFS> toDelete = new ArrayList<>(); for (AnnotationFS token : jcas.getAnnotationIndex(tokenType)) { if (previous != null && names.contains(previous.getCoveredText()) && suffix.contains(token.getCoveredText())) { token.setIntValue(beginFeature, previous.getBegin()); toDelete.add(previous); } previous = token; } // Remove the no longer necessary tokens ("Friedrich" and "Franz"), since we expanded the // following tokens "III." and "II." to include their text Set<String> removedWords = new HashSet<String>(); for (AnnotationFS token : toDelete) { System.out.printf("Removing %s%n", token.getCoveredText()); removedWords.add(token.getCoveredText()); jcas.removeFsFromIndexes(token); } // Check if the tokens that we wanted to remove are really gone for (AnnotationFS token : jcas.getAnnotationIndex(tokenType)) { System.out.printf("Remaining %s%n", token.getCoveredText()); if (removedWords.contains(token.getCoveredText())) { org.junit.Assert.fail("I saw a zombie!!!"); } } }