-
Type:
Bug
-
Status: Closed
-
Priority:
Major
-
Resolution: Done
-
Affects Version/s: None
-
Fix Version/s: 2.7.0SDK
-
Component/s: Core Java Framework
-
Labels:None
When annotations are removed from indexes, sometimes they come back... the following test case shows how an annotation is removed but still present when iterating over the index later.
@Test public void testForZombies() throws Exception { // No zombie here int[] offsets1 = { 0, 4, 5, 11, 12, 21, 22, 25, 26, 29, 30, 35, 36, 40, 41, 50, 51, 60, 61, 64, 64, 65 }; testForZombies("Dies flößte Friedrich II. für seine neue Eroberung Besorgnis ein.", offsets1); // Zombie hiding in here int[] offsets2 = { 0, 3, 4, 7, 8, 13, 14, 18, 19, 22, 23, 33, 34, 35 }; testForZombies("Ich bin Franz III. von Hammerfels !", offsets2); } public void testForZombies(String aText, int[] aOffsets) throws Exception { // Init some dictionaries we ues Set<String> names = new HashSet<String>(); names.add("Friedrich"); names.add("Franz"); Set<String> suffix = new HashSet<String>(); suffix.add("II."); suffix.add("III."); // Set up type system TypeSystemDescription tsd = new TypeSystemDescription_impl(); tsd.addType("Token", "", CAS.TYPE_NAME_ANNOTATION); // Create CAS CAS jcas = CasCreationUtils.createCas(tsd, null, null); jcas.setDocumentText(aText); Type tokenType = jcas.getTypeSystem().getType("Token"); Feature beginFeature = tokenType.getFeatureByBaseName("begin"); // Create tokens in CAS for (int i = 0; i < aOffsets.length; i += 2) { jcas.addFsToIndexes(jcas.createAnnotation(tokenType, aOffsets[i], aOffsets[i+1])); } // List the tokens in the CAS for (AnnotationFS token : jcas.getAnnotationIndex(tokenType)) { System.out.printf("Starting with %s%n", token.getCoveredText()); } // Merge some tokens, in particular "Franz" "III." -> "Franz III." and "Friedrich" "II." // into "Friedrich II." AnnotationFS previous = null; List<AnnotationFS> toDelete = new ArrayList<>(); for (AnnotationFS token : jcas.getAnnotationIndex(tokenType)) { if (previous != null && names.contains(previous.getCoveredText()) && suffix.contains(token.getCoveredText())) { token.setIntValue(beginFeature, previous.getBegin()); toDelete.add(previous); } previous = token; } // Remove the no longer necessary tokens ("Friedrich" and "Franz"), since we expanded the // following tokens "III." and "II." to include their text Set<String> removedWords = new HashSet<String>(); for (AnnotationFS token : toDelete) { System.out.printf("Removing %s%n", token.getCoveredText()); removedWords.add(token.getCoveredText()); jcas.removeFsFromIndexes(token); } // Check if the tokens that we wanted to remove are really gone for (AnnotationFS token : jcas.getAnnotationIndex(tokenType)) { System.out.printf("Remaining %s%n", token.getCoveredText()); if (removedWords.contains(token.getCoveredText())) { org.junit.Assert.fail("I saw a zombie!!!"); } } }
- is related to
-
UIMA-4059 Checking for incorrect key modifications
-
- Resolved
-